From 9da74c1c6f2ad8f0bc9edef099a0505a1d1ac76e Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Thu, 7 Mar 2024 15:49:04 +0100 Subject: [PATCH] Use pathlib where possible --- app/converters/sandpaper.py | 67 +++++++++------------- app/corpora/cli.py | 19 ++++-- app/corpora/cqi_over_sio/extensions.py | 7 +-- app/corpora/files/routes.py | 5 +- app/jobs/json_routes.py | 3 +- app/jobs/routes.py | 9 ++- app/main/cli.py | 20 +++---- app/models/__init__.py | 1 + app/models/avatar.py | 10 ++-- app/models/corpus.py | 44 ++++++++------ app/models/corpus_file.py | 9 +-- app/models/job.py | 24 ++++---- app/models/job_input.py | 6 +- app/models/job_result.py | 6 +- app/models/spacy_nlp_pipeline_model.py | 30 ++++------ app/models/tesseract_ocr_pipeline_model.py | 27 ++++----- app/models/user.py | 26 +++++---- app/services/__init__.py | 7 +-- app/users/cli.py | 2 - app/users/routes.py | 5 +- config.py | 4 +- 21 files changed, 164 insertions(+), 167 deletions(-) diff --git a/app/converters/sandpaper.py b/app/converters/sandpaper.py index 27f2bcc6..86deb8d0 100644 --- a/app/converters/sandpaper.py +++ b/app/converters/sandpaper.py @@ -2,80 +2,69 @@ from flask import current_app from app import db from app.models import User, Corpus, CorpusFile from datetime import datetime +from pathlib import Path +from typing import Dict, List import json -import os import shutil class SandpaperConverter: - def __init__(self, json_db_file, data_dir): + def __init__(self, json_db_file: Path, data_dir: Path): self.json_db_file = json_db_file self.data_dir = data_dir def run(self): - with open(self.json_db_file, 'r') as f: - json_db = json.loads(f.read()) + with self.json_db_file.open('r') as f: + json_db: List[Dict] = json.load(f) for json_user in json_db: if not json_user['confirmed']: current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') continue - user_dir = os.path.join(self.data_dir, str(json_user['id'])) + user_dir = self.data_dir / f'{json_user["id"]}' self.convert_user(json_user, user_dir) db.session.commit() - def convert_user(self, json_user, user_dir): + def convert_user(self, json_user: Dict, user_dir: Path): current_app.logger.info(f'Create User {json_user["username"]}...') - user = User( - confirmed=json_user['confirmed'], - email=json_user['email'], - last_seen=datetime.fromtimestamp(json_user['last_seen']), - member_since=datetime.fromtimestamp(json_user['member_since']), - password_hash=json_user['password_hash'], # TODO: Needs to be added manually - username=json_user['username'] - ) - db.session.add(user) - db.session.flush(objects=[user]) - db.session.refresh(user) try: - user.makedirs() - except OSError as e: - current_app.logger.error(e) - db.session.rollback() + user = User.create( + confirmed=json_user['confirmed'], + email=json_user['email'], + last_seen=datetime.fromtimestamp(json_user['last_seen']), + member_since=datetime.fromtimestamp(json_user['member_since']), + password_hash=json_user['password_hash'], # TODO: Needs to be added manually + username=json_user['username'] + ) + except OSError: raise Exception('Internal Server Error') for json_corpus in json_user['corpora'].values(): if not json_corpus['files'].values(): current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') continue - corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id'])) + corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}' self.convert_corpus(json_corpus, user, corpus_dir) current_app.logger.info('Done') - def convert_corpus(self, json_corpus, user, corpus_dir): + def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path): current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') - corpus = Corpus( - user=user, - creation_date=datetime.fromtimestamp(json_corpus['creation_date']), - description=json_corpus['description'], - title=json_corpus['title'] - ) - db.session.add(corpus) - db.session.flush(objects=[corpus]) - db.session.refresh(corpus) try: - corpus.makedirs() - except OSError as e: - current_app.logger.error(e) - db.session.rollback() + corpus = Corpus.create( + user=user, + creation_date=datetime.fromtimestamp(json_corpus['creation_date']), + description=json_corpus['description'], + title=json_corpus['title'] + ) + except OSError: raise Exception('Internal Server Error') for json_corpus_file in json_corpus['files'].values(): self.convert_corpus_file(json_corpus_file, corpus, corpus_dir) current_app.logger.info('Done') - def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir): + def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path): current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') corpus_file = CorpusFile( corpus=corpus, @@ -99,13 +88,13 @@ class SandpaperConverter: db.session.refresh(corpus_file) try: shutil.copy2( - os.path.join(corpus_dir, json_corpus_file['filename']), + corpus_dir / json_corpus_file['filename'], corpus_file.path ) except: current_app.logger.warning( 'Can not convert corpus file: ' - f'{os.path.join(corpus_dir, json_corpus_file["filename"])}' + f'{corpus_dir / json_corpus_file["filename"]}' ' -> ' f'{corpus_file.path}' ) diff --git a/app/corpora/cli.py b/app/corpora/cli.py index 8c1a0970..2117cb6f 100644 --- a/app/corpora/cli.py +++ b/app/corpora/cli.py @@ -1,7 +1,7 @@ -from app.models import Corpus, CorpusStatus -import os +from flask import current_app import shutil from app import db +from app.models import Corpus, CorpusStatus from . import bp @@ -18,10 +18,17 @@ def reset(): ] for corpus in [x for x in Corpus.query.all() if x.status in status]: print(f'Resetting corpus {corpus}') - shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True) - os.mkdir(os.path.join(corpus.path, 'cwb')) - os.mkdir(os.path.join(corpus.path, 'cwb', 'data')) - os.mkdir(os.path.join(corpus.path, 'cwb', 'registry')) + corpus_cwb_dir = corpus.path / 'cwb' + corpus_cwb_data_dir = corpus_cwb_dir / 'data' + corpus_cwb_registry_dir = corpus_cwb_dir / 'registry' + try: + shutil.rmtree(corpus.path / 'cwb', ignore_errors=True) + corpus_cwb_dir.mkdir() + corpus_cwb_data_dir.mkdir() + corpus_cwb_registry_dir.mkdir() + except OSError as e: + current_app.logger.error(e) + raise corpus.status = CorpusStatus.UNPREPARED corpus.num_analysis_sessions = 0 db.session.commit() diff --git a/app/corpora/cqi_over_sio/extensions.py b/app/corpora/cqi_over_sio/extensions.py index 6748b963..a5401f27 100644 --- a/app/corpora/cqi_over_sio/extensions.py +++ b/app/corpora/cqi_over_sio/extensions.py @@ -12,7 +12,6 @@ from typing import Dict, List import gzip import json import math -import os from app import db from app.models import Corpus from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus @@ -42,9 +41,9 @@ def ext_corpus_static_data(corpus: str) -> Dict: db_corpus_id: int = session['cqi_over_sio']['db_corpus_id'] db_corpus: Corpus = Corpus.query.get(db_corpus_id) - static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz') - if os.path.exists(static_data_file_path): - with open(static_data_file_path, 'rb') as f: + static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz' + if static_data_file_path.exists(): + with static_data_file_path.open('rb') as f: return f.read() cqi_client: CQiClient = session['cqi_over_sio']['cqi_client'] diff --git a/app/corpora/files/routes.py b/app/corpora/files/routes.py index e5ad094d..a5a696f6 100644 --- a/app/corpora/files/routes.py +++ b/app/corpora/files/routes.py @@ -7,7 +7,6 @@ from flask import ( url_for ) from flask_breadcrumbs import register_breadcrumb -import os from app import db from app.models import Corpus, CorpusFile, CorpusStatus from ..decorators import corpus_follower_permission_required @@ -92,8 +91,8 @@ def corpus_file(corpus_id, corpus_file_id): def download_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404() return send_from_directory( - os.path.dirname(corpus_file.path), - os.path.basename(corpus_file.path), + corpus_file.path.parent, + corpus_file.path.name, as_attachment=True, attachment_filename=corpus_file.filename, mimetype=corpus_file.mimetype diff --git a/app/jobs/json_routes.py b/app/jobs/json_routes.py index 9f1e1b2f..28849e98 100644 --- a/app/jobs/json_routes.py +++ b/app/jobs/json_routes.py @@ -1,7 +1,6 @@ from flask import abort, current_app from flask_login import current_user from threading import Thread -import os from app import db from app.decorators import admin_required, content_negotiation from app.models import Job, JobStatus @@ -39,7 +38,7 @@ def job_log(job_id): if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]: response = {'errors': {'message': 'Job status is not completed or failed'}} return response, 409 - with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file: + with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file: log = log_file.read() response_data = { 'jobLog': log diff --git a/app/jobs/routes.py b/app/jobs/routes.py index f0480293..ba3f8c92 100644 --- a/app/jobs/routes.py +++ b/app/jobs/routes.py @@ -7,7 +7,6 @@ from flask import ( ) from flask_breadcrumbs import register_breadcrumb from flask_login import current_user -import os from app.models import Job, JobInput, JobResult from . import bp from .utils import job_dynamic_list_constructor as job_dlc @@ -38,8 +37,8 @@ def download_job_input(job_id, job_input_id): if not (job_input.job.user == current_user or current_user.is_administrator()): abort(403) return send_from_directory( - os.path.dirname(job_input.path), - os.path.basename(job_input.path), + job_input.path.parent, + job_input.path.name, as_attachment=True, attachment_filename=job_input.filename, mimetype=job_input.mimetype @@ -52,8 +51,8 @@ def download_job_result(job_id, job_result_id): if not (job_result.job.user == current_user or current_user.is_administrator()): abort(403) return send_from_directory( - os.path.dirname(job_result.path), - os.path.basename(job_result.path), + job_result.path.parent, + job_result.path.name, as_attachment=True, attachment_filename=job_result.filename, mimetype=job_result.mimetype diff --git a/app/main/cli.py b/app/main/cli.py index 45fabf38..cb9cab55 100644 --- a/app/main/cli.py +++ b/app/main/cli.py @@ -1,6 +1,7 @@ from flask import current_app from flask_migrate import upgrade -import os +from pathlib import Path +from typing import List from app.models import ( CorpusFollowerRole, Role, @@ -17,16 +18,15 @@ def deploy(): # Make default directories print('Make default directories') base_dir = current_app.config['NOPAQUE_DATA_DIR'] - default_dirs = [ - os.path.join(base_dir, 'tmp'), - os.path.join(base_dir, 'users') + default_dirs: List[Path] = [ + base_dir / 'tmp', + base_dir / 'users' ] - for dir in default_dirs: - if os.path.exists(dir): - if not os.path.isdir(dir): - raise NotADirectoryError(f'{dir} is not a directory') - else: - os.mkdir(dir) + for default_dir in default_dirs: + if not default_dir.exists(): + default_dir.mkdir() + if not default_dir.is_dir(): + raise NotADirectoryError(f'{default_dir} is not a directory') # migrate database to latest revision print('Migrate database to latest revision') diff --git a/app/models/__init__.py b/app/models/__init__.py index 2ff20306..639fd278 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -11,6 +11,7 @@ from .spacy_nlp_pipeline_model import * from .tesseract_ocr_pipeline_model import * from .token import * from .user import * +from app import login @login.user_loader diff --git a/app/models/avatar.py b/app/models/avatar.py index c8f67fdf..f361eb45 100644 --- a/app/models/avatar.py +++ b/app/models/avatar.py @@ -1,6 +1,6 @@ from flask import current_app from flask_hashids import HashidMixin -import os +from pathlib import Path from app import db from .file_mixin import FileMixin @@ -15,14 +15,16 @@ class Avatar(HashidMixin, FileMixin, db.Model): user = db.relationship('User', back_populates='avatar') @property - def path(self): - return os.path.join(self.user.path, 'avatar') + def path(self) -> Path: + return self.user.path / 'avatar' + # return os.path.join(self.user.path, 'avatar') def delete(self): try: - os.remove(self.path) + self.path.unlink(missing_ok=True) except OSError as e: current_app.logger.error(e) + raise db.session.delete(self) def to_json_serializeable(self, backrefs=False, relationships=False): diff --git a/app/models/corpus.py b/app/models/corpus.py index 32003461..1d541413 100644 --- a/app/models/corpus.py +++ b/app/models/corpus.py @@ -4,7 +4,7 @@ from flask import current_app, url_for from flask_hashids import HashidMixin from sqlalchemy.ext.associationproxy import association_proxy from typing import Union -import os +from pathlib import Path import shutil import xml.etree.ElementTree as ET from app import db @@ -88,8 +88,8 @@ class Corpus(HashidMixin, db.Model): return f'{self.user.jsonpatch_path}/corpora/{self.hashid}' @property - def path(self): - return os.path.join(self.user.path, 'corpora', str(self.id)) + def path(self) -> Path: + return self.user.path / 'corpora' / f'{self.id}' @property def url(self): @@ -105,27 +105,39 @@ class Corpus(HashidMixin, db.Model): db.session.add(corpus) db.session.flush(objects=[corpus]) db.session.refresh(corpus) + corpus_files_dir = corpus.path / 'files' + corpus_cwb_dir = corpus.path / 'cwb' + corpus_cwb_data_dir = corpus_cwb_dir / 'data' + corpus_cwb_registry_dir = corpus_cwb_dir / 'registry' try: - os.mkdir(corpus.path) - os.mkdir(os.path.join(corpus.path, 'files')) - os.mkdir(os.path.join(corpus.path, 'cwb')) - os.mkdir(os.path.join(corpus.path, 'cwb', 'data')) - os.mkdir(os.path.join(corpus.path, 'cwb', 'registry')) + corpus.path.mkdir() + corpus_files_dir.mkdir() + corpus_cwb_dir.mkdir() + corpus_cwb_data_dir.mkdir() + corpus_cwb_registry_dir.mkdir() except OSError as e: + # TODO: Potential leftover cleanup current_app.logger.error(e) db.session.rollback() - raise e + raise return corpus def build(self): - build_dir = os.path.join(self.path, 'cwb') - shutil.rmtree(build_dir, ignore_errors=True) - os.mkdir(build_dir) - os.mkdir(os.path.join(build_dir, 'data')) - os.mkdir(os.path.join(build_dir, 'registry')) + corpus_cwb_dir = self.path / 'cwb' + corpus_cwb_data_dir = corpus_cwb_dir / 'data' + corpus_cwb_registry_dir = corpus_cwb_dir / 'registry' + try: + shutil.rmtree(corpus_cwb_dir, ignore_errors=True) + corpus_cwb_dir.mkdir() + corpus_cwb_data_dir.mkdir() + corpus_cwb_registry_dir.mkdir() + except OSError as e: + current_app.logger.error(e) + self.status = CorpusStatus.FAILED + raise corpus_element = ET.fromstring('\n') for corpus_file in self.files: - normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt') + normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt' try: normalize_vrt_file(corpus_file.path, normalized_vrt_path) except: @@ -152,7 +164,7 @@ class Corpus(HashidMixin, db.Model): # corpus_element.insert(1, text_element) corpus_element.append(text_element) ET.ElementTree(corpus_element).write( - os.path.join(build_dir, 'corpus.vrt'), + corpus_cwb_dir / 'corpus.vrt', encoding='utf-8' ) self.status = CorpusStatus.SUBMITTED diff --git a/app/models/corpus_file.py b/app/models/corpus_file.py index f785dc8f..566ad39f 100644 --- a/app/models/corpus_file.py +++ b/app/models/corpus_file.py @@ -1,6 +1,6 @@ from flask import current_app, url_for from flask_hashids import HashidMixin -import os +from pathlib import Path from app import db from .corpus import CorpusStatus from .file_mixin import FileMixin @@ -45,8 +45,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model): return f'{self.corpus.jsonpatch_path}/files/{self.hashid}' @property - def path(self): - return os.path.join(self.corpus.path, 'files', str(self.id)) + def path(self) -> Path: + return self.corpus.path / 'files' / f'{self.id}' @property def url(self): @@ -66,9 +66,10 @@ class CorpusFile(FileMixin, HashidMixin, db.Model): def delete(self): try: - os.remove(self.path) + self.path.unlink(missing_ok=True) except OSError as e: current_app.logger.error(e) + raise db.session.delete(self) self.corpus.status = CorpusStatus.UNPREPARED diff --git a/app/models/job.py b/app/models/job.py index 39b30c12..daa043c5 100644 --- a/app/models/job.py +++ b/app/models/job.py @@ -4,7 +4,7 @@ from flask import current_app, url_for from flask_hashids import HashidMixin from time import sleep from typing import Union -import os +from pathlib import Path import shutil from app import db from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn @@ -79,8 +79,8 @@ class Job(HashidMixin, db.Model): return f'{self.user.jsonpatch_path}/jobs/{self.hashid}' @property - def path(self): - return os.path.join(self.user.path, 'jobs', str(self.id)) + def path(self) -> Path: + return self.user.path / 'jobs' / f'{self.id}' @property def url(self): @@ -96,15 +96,19 @@ class Job(HashidMixin, db.Model): db.session.add(job) db.session.flush(objects=[job]) db.session.refresh(job) + job_inputs_dir = job.path / 'inputs' + job_pipeline_data_dir = job.path / 'pipeline_data' + job_results_dir = job.path / 'results' try: - os.mkdir(job.path) - os.mkdir(os.path.join(job.path, 'inputs')) - os.mkdir(os.path.join(job.path, 'pipeline_data')) - os.mkdir(os.path.join(job.path, 'results')) + job.path.mkdir() + job_inputs_dir.mkdir() + job_pipeline_data_dir.mkdir() + job_results_dir.mkdir() except OSError as e: + # TODO: Potential leftover cleanup current_app.logger.error(e) db.session.rollback() - raise e + raise return job def delete(self): @@ -131,8 +135,8 @@ class Job(HashidMixin, db.Model): ''' Restart a job - only if the status is failed ''' if self.status != JobStatus.FAILED: raise Exception('Job status is not "failed"') - shutil.rmtree(os.path.join(self.path, 'results'), ignore_errors=True) - shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True) + shutil.rmtree(self.path / 'results', ignore_errors=True) + shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True) for result in self.results: db.session.delete(result) self.end_date = None diff --git a/app/models/job_input.py b/app/models/job_input.py index c3ec9c5a..8405a92b 100644 --- a/app/models/job_input.py +++ b/app/models/job_input.py @@ -1,6 +1,6 @@ from flask import url_for from flask_hashids import HashidMixin -import os +from pathlib import Path from app import db from .file_mixin import FileMixin @@ -33,8 +33,8 @@ class JobInput(FileMixin, HashidMixin, db.Model): return f'{self.job.jsonpatch_path}/inputs/{self.hashid}' @property - def path(self): - return os.path.join(self.job.path, 'inputs', str(self.id)) + def path(self) -> Path: + return self.job.path / 'inputs' / f'{self.id}' @property def url(self): diff --git a/app/models/job_result.py b/app/models/job_result.py index c99b07fb..b0c9c1e3 100644 --- a/app/models/job_result.py +++ b/app/models/job_result.py @@ -1,6 +1,6 @@ from flask import url_for from flask_hashids import HashidMixin -import os +from pathlib import Path from app import db from .file_mixin import FileMixin @@ -35,8 +35,8 @@ class JobResult(FileMixin, HashidMixin, db.Model): return f'{self.job.jsonpatch_path}/results/{self.hashid}' @property - def path(self): - return os.path.join(self.job.path, 'results', str(self.id)) + def path(self) -> Path: + return self.job.path / 'results' / f'{self.id}' @property def url(self): diff --git a/app/models/spacy_nlp_pipeline_model.py b/app/models/spacy_nlp_pipeline_model.py index 127526b7..4cea0d3f 100644 --- a/app/models/spacy_nlp_pipeline_model.py +++ b/app/models/spacy_nlp_pipeline_model.py @@ -1,8 +1,7 @@ -from flask import abort, current_app, url_for +from flask import current_app, url_for from flask_hashids import HashidMixin -from time import sleep from tqdm import tqdm -import os +from pathlib import Path import requests import yaml from app import db @@ -32,12 +31,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): user = db.relationship('User', back_populates='spacy_nlp_pipeline_models') @property - def path(self): - return os.path.join( - self.user.path, - 'spacy_nlp_pipeline_models', - str(self.id) - ) + def path(self) -> Path: + return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}' @property def jsonpatch_path(self): @@ -57,14 +52,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): @staticmethod def insert_defaults(force_download=False): nopaque_user = User.query.filter_by(username='nopaque').first() - defaults_file = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - 'default_records', - 'spacy_nlp_pipeline_model.yml' - ) - with open(defaults_file, 'r') as f: - defaults = yaml.safe_load(f) - for m in defaults: + default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml' + with default_records_file.open('r') as f: + default_records = yaml.safe_load(f) + for m in default_records: model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa if model is not None: model.compatible_service_versions = m['compatible_service_versions'] @@ -96,7 +87,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): db.session.add(model) db.session.flush(objects=[model]) db.session.refresh(model) - if not os.path.exists(model.path) or force_download: + if not model.path.exists() or force_download: r = requests.get(m['url'], stream=True) pbar = tqdm( desc=f'{model.title} ({model.filename})', @@ -116,9 +107,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): def delete(self): try: - os.remove(self.path) + self.path.unlink(missing_ok=True) except OSError as e: current_app.logger.error(e) + raise db.session.delete(self) def to_json_serializeable(self, backrefs=False, relationships=False): diff --git a/app/models/tesseract_ocr_pipeline_model.py b/app/models/tesseract_ocr_pipeline_model.py index 4e8e9550..20f5feee 100644 --- a/app/models/tesseract_ocr_pipeline_model.py +++ b/app/models/tesseract_ocr_pipeline_model.py @@ -1,7 +1,7 @@ from flask import current_app, url_for from flask_hashids import HashidMixin from tqdm import tqdm -import os +from pathlib import Path import requests import yaml from app import db @@ -30,12 +30,8 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models') @property - def path(self): - return os.path.join( - self.user.path, - 'tesseract_ocr_pipeline_models', - str(self.id) - ) + def path(self) -> Path: + return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}' @property def jsonpatch_path(self): @@ -55,14 +51,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): @staticmethod def insert_defaults(force_download=False): nopaque_user = User.query.filter_by(username='nopaque').first() - defaults_file = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - 'default_records', - 'tesseract_ocr_pipeline_model.yml' - ) - with open(defaults_file, 'r') as f: - defaults = yaml.safe_load(f) - for m in defaults: + default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml' + with default_records_file.open('r') as f: + default_records = yaml.safe_load(f) + for m in default_records: model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa if model is not None: model.compatible_service_versions = m['compatible_service_versions'] @@ -92,7 +84,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): db.session.flush(objects=[model]) db.session.refresh(model) model.filename = f'{model.id}.traineddata' - if not os.path.exists(model.path) or force_download: + if not model.path.exists() or force_download: r = requests.get(m['url'], stream=True) pbar = tqdm( desc=f'{model.title} ({model.filename})', @@ -112,9 +104,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): def delete(self): try: - os.remove(self.path) + self.path.unlink(missing_ok=True) except OSError as e: current_app.logger.error(e) + raise db.session.delete(self) def to_json_serializeable(self, backrefs=False, relationships=False): diff --git a/app/models/user.py b/app/models/user.py index a3b21198..8ba96b14 100644 --- a/app/models/user.py +++ b/app/models/user.py @@ -4,14 +4,14 @@ from flask import current_app, url_for from flask_hashids import HashidMixin from flask_login import UserMixin from sqlalchemy.ext.associationproxy import association_proxy +from pathlib import Path from typing import Union from werkzeug.security import generate_password_hash, check_password_hash import jwt -import os import re import secrets import shutil -from app import db, hashids, login +from app import db, hashids from app.ext.flask_sqlalchemy import IntEnumColumn from .corpus import Corpus from .corpus_follower_association import CorpusFollowerAssociation @@ -145,9 +145,8 @@ class User(HashidMixin, UserMixin, db.Model): self.password_hash = generate_password_hash(password) @property - def path(self): - return os.path.join( - current_app.config.get('NOPAQUE_DATA_DIR'), 'users', str(self.id)) + def path(self) -> Path: + return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}' @staticmethod def create(**kwargs): @@ -155,16 +154,21 @@ class User(HashidMixin, UserMixin, db.Model): db.session.add(user) db.session.flush(objects=[user]) db.session.refresh(user) + user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models' + user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models' + user_corpora_dir = user.path / 'corpora' + user_jobs_dir = user.path / 'jobs' try: - os.mkdir(user.path) - os.mkdir(os.path.join(user.path, 'spacy_nlp_pipeline_models')) - os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models')) - os.mkdir(os.path.join(user.path, 'corpora')) - os.mkdir(os.path.join(user.path, 'jobs')) + user.path.mkdir() + user_spacy_nlp_pipeline_models_dir.mkdir() + user_tesseract_ocr_pipeline_models_dir.mkdir() + user_corpora_dir.mkdir() + user_jobs_dir.mkdir() except OSError as e: + # TODO: Potential leftover cleanup current_app.logger.error(e) db.session.rollback() - raise e + raise return user @staticmethod diff --git a/app/services/__init__.py b/app/services/__init__.py index 25955e3d..bb204103 100644 --- a/app/services/__init__.py +++ b/app/services/__init__.py @@ -1,12 +1,11 @@ from flask import Blueprint from flask_login import login_required -import os +from pathlib import Path import yaml -services_file = \ - os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml') -with open(services_file, 'r') as f: +services_file = Path(__file__).parent / 'services.yml' +with services_file.open('r') as f: SERVICES = yaml.safe_load(f) bp = Blueprint('services', __name__) diff --git a/app/users/cli.py b/app/users/cli.py index 6aebca47..a0b474e8 100644 --- a/app/users/cli.py +++ b/app/users/cli.py @@ -1,6 +1,4 @@ from app.models import User -import os -import shutil from app import db from . import bp diff --git a/app/users/routes.py b/app/users/routes.py index fbb5a609..1d4cb468 100644 --- a/app/users/routes.py +++ b/app/users/routes.py @@ -7,7 +7,6 @@ from flask import ( ) from flask_breadcrumbs import register_breadcrumb from flask_login import current_user -import os from app.models import User from . import bp from .utils import user_dynamic_list_constructor as user_dlc @@ -40,8 +39,8 @@ def user_avatar(user_id): if user.avatar is None: return redirect(url_for('static', filename='images/user_avatar.png')) return send_from_directory( - os.path.dirname(user.avatar.path), - os.path.basename(user.avatar.path), + user.avatar.path.parent, + user.avatar.path.name, as_attachment=True, attachment_filename=user.avatar.filename, mimetype=user.avatar.mimetype diff --git a/config.py b/config.py index daafd37f..5f3d4f28 100644 --- a/config.py +++ b/config.py @@ -1,6 +1,7 @@ from dotenv import load_dotenv from flask import Flask from logging.handlers import RotatingFileHandler +from pathlib import Path from werkzeug.middleware.proxy_fix import ProxyFix import logging import os @@ -57,8 +58,7 @@ class Config: ''' # nopaque # ''' NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN') - NOPAQUE_DATA_DIR = \ - os.path.abspath(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque')) + NOPAQUE_DATA_DIR = Path(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque')) NOPAQUE_IS_PRIMARY_INSTANCE = \ os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true' NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]'