From 9da74c1c6f2ad8f0bc9edef099a0505a1d1ac76e Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Thu, 7 Mar 2024 15:49:04 +0100
Subject: [PATCH] Use pathlib where possible
---
app/converters/sandpaper.py | 67 +++++++++-------------
app/corpora/cli.py | 19 ++++--
app/corpora/cqi_over_sio/extensions.py | 7 +--
app/corpora/files/routes.py | 5 +-
app/jobs/json_routes.py | 3 +-
app/jobs/routes.py | 9 ++-
app/main/cli.py | 20 +++----
app/models/__init__.py | 1 +
app/models/avatar.py | 10 ++--
app/models/corpus.py | 44 ++++++++------
app/models/corpus_file.py | 9 +--
app/models/job.py | 24 ++++----
app/models/job_input.py | 6 +-
app/models/job_result.py | 6 +-
app/models/spacy_nlp_pipeline_model.py | 30 ++++------
app/models/tesseract_ocr_pipeline_model.py | 27 ++++-----
app/models/user.py | 26 +++++----
app/services/__init__.py | 7 +--
app/users/cli.py | 2 -
app/users/routes.py | 5 +-
config.py | 4 +-
21 files changed, 164 insertions(+), 167 deletions(-)
diff --git a/app/converters/sandpaper.py b/app/converters/sandpaper.py
index 27f2bcc6..86deb8d0 100644
--- a/app/converters/sandpaper.py
+++ b/app/converters/sandpaper.py
@@ -2,80 +2,69 @@ from flask import current_app
from app import db
from app.models import User, Corpus, CorpusFile
from datetime import datetime
+from pathlib import Path
+from typing import Dict, List
import json
-import os
import shutil
class SandpaperConverter:
- def __init__(self, json_db_file, data_dir):
+ def __init__(self, json_db_file: Path, data_dir: Path):
self.json_db_file = json_db_file
self.data_dir = data_dir
def run(self):
- with open(self.json_db_file, 'r') as f:
- json_db = json.loads(f.read())
+ with self.json_db_file.open('r') as f:
+ json_db: List[Dict] = json.load(f)
for json_user in json_db:
if not json_user['confirmed']:
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
continue
- user_dir = os.path.join(self.data_dir, str(json_user['id']))
+ user_dir = self.data_dir / f'{json_user["id"]}'
self.convert_user(json_user, user_dir)
db.session.commit()
- def convert_user(self, json_user, user_dir):
+ def convert_user(self, json_user: Dict, user_dir: Path):
current_app.logger.info(f'Create User {json_user["username"]}...')
- user = User(
- confirmed=json_user['confirmed'],
- email=json_user['email'],
- last_seen=datetime.fromtimestamp(json_user['last_seen']),
- member_since=datetime.fromtimestamp(json_user['member_since']),
- password_hash=json_user['password_hash'], # TODO: Needs to be added manually
- username=json_user['username']
- )
- db.session.add(user)
- db.session.flush(objects=[user])
- db.session.refresh(user)
try:
- user.makedirs()
- except OSError as e:
- current_app.logger.error(e)
- db.session.rollback()
+ user = User.create(
+ confirmed=json_user['confirmed'],
+ email=json_user['email'],
+ last_seen=datetime.fromtimestamp(json_user['last_seen']),
+ member_since=datetime.fromtimestamp(json_user['member_since']),
+ password_hash=json_user['password_hash'], # TODO: Needs to be added manually
+ username=json_user['username']
+ )
+ except OSError:
raise Exception('Internal Server Error')
for json_corpus in json_user['corpora'].values():
if not json_corpus['files'].values():
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
continue
- corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
+ corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
self.convert_corpus(json_corpus, user, corpus_dir)
current_app.logger.info('Done')
- def convert_corpus(self, json_corpus, user, corpus_dir):
+ def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
- corpus = Corpus(
- user=user,
- creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
- description=json_corpus['description'],
- title=json_corpus['title']
- )
- db.session.add(corpus)
- db.session.flush(objects=[corpus])
- db.session.refresh(corpus)
try:
- corpus.makedirs()
- except OSError as e:
- current_app.logger.error(e)
- db.session.rollback()
+ corpus = Corpus.create(
+ user=user,
+ creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
+ description=json_corpus['description'],
+ title=json_corpus['title']
+ )
+ except OSError:
raise Exception('Internal Server Error')
for json_corpus_file in json_corpus['files'].values():
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
current_app.logger.info('Done')
- def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
+ def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile(
corpus=corpus,
@@ -99,13 +88,13 @@ class SandpaperConverter:
db.session.refresh(corpus_file)
try:
shutil.copy2(
- os.path.join(corpus_dir, json_corpus_file['filename']),
+ corpus_dir / json_corpus_file['filename'],
corpus_file.path
)
except:
current_app.logger.warning(
'Can not convert corpus file: '
- f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
+ f'{corpus_dir / json_corpus_file["filename"]}'
' -> '
f'{corpus_file.path}'
)
diff --git a/app/corpora/cli.py b/app/corpora/cli.py
index 8c1a0970..2117cb6f 100644
--- a/app/corpora/cli.py
+++ b/app/corpora/cli.py
@@ -1,7 +1,7 @@
-from app.models import Corpus, CorpusStatus
-import os
+from flask import current_app
import shutil
from app import db
+from app.models import Corpus, CorpusStatus
from . import bp
@@ -18,10 +18,17 @@ def reset():
]
for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}')
- shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
- os.mkdir(os.path.join(corpus.path, 'cwb'))
- os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
- os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
+ corpus_cwb_dir = corpus.path / 'cwb'
+ corpus_cwb_data_dir = corpus_cwb_dir / 'data'
+ corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
+ try:
+ shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
+ corpus_cwb_dir.mkdir()
+ corpus_cwb_data_dir.mkdir()
+ corpus_cwb_registry_dir.mkdir()
+ except OSError as e:
+ current_app.logger.error(e)
+ raise
corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0
db.session.commit()
diff --git a/app/corpora/cqi_over_sio/extensions.py b/app/corpora/cqi_over_sio/extensions.py
index 6748b963..a5401f27 100644
--- a/app/corpora/cqi_over_sio/extensions.py
+++ b/app/corpora/cqi_over_sio/extensions.py
@@ -12,7 +12,6 @@ from typing import Dict, List
import gzip
import json
import math
-import os
from app import db
from app.models import Corpus
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
@@ -42,9 +41,9 @@ def ext_corpus_static_data(corpus: str) -> Dict:
db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
db_corpus: Corpus = Corpus.query.get(db_corpus_id)
- static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
- if os.path.exists(static_data_file_path):
- with open(static_data_file_path, 'rb') as f:
+ static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
+ if static_data_file_path.exists():
+ with static_data_file_path.open('rb') as f:
return f.read()
cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
diff --git a/app/corpora/files/routes.py b/app/corpora/files/routes.py
index e5ad094d..a5a696f6 100644
--- a/app/corpora/files/routes.py
+++ b/app/corpora/files/routes.py
@@ -7,7 +7,6 @@ from flask import (
url_for
)
from flask_breadcrumbs import register_breadcrumb
-import os
from app import db
from app.models import Corpus, CorpusFile, CorpusStatus
from ..decorators import corpus_follower_permission_required
@@ -92,8 +91,8 @@ def corpus_file(corpus_id, corpus_file_id):
def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
return send_from_directory(
- os.path.dirname(corpus_file.path),
- os.path.basename(corpus_file.path),
+ corpus_file.path.parent,
+ corpus_file.path.name,
as_attachment=True,
attachment_filename=corpus_file.filename,
mimetype=corpus_file.mimetype
diff --git a/app/jobs/json_routes.py b/app/jobs/json_routes.py
index 9f1e1b2f..28849e98 100644
--- a/app/jobs/json_routes.py
+++ b/app/jobs/json_routes.py
@@ -1,7 +1,6 @@
from flask import abort, current_app
from flask_login import current_user
from threading import Thread
-import os
from app import db
from app.decorators import admin_required, content_negotiation
from app.models import Job, JobStatus
@@ -39,7 +38,7 @@ def job_log(job_id):
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
response = {'errors': {'message': 'Job status is not completed or failed'}}
return response, 409
- with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
+ with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
log = log_file.read()
response_data = {
'jobLog': log
diff --git a/app/jobs/routes.py b/app/jobs/routes.py
index f0480293..ba3f8c92 100644
--- a/app/jobs/routes.py
+++ b/app/jobs/routes.py
@@ -7,7 +7,6 @@ from flask import (
)
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
-import os
from app.models import Job, JobInput, JobResult
from . import bp
from .utils import job_dynamic_list_constructor as job_dlc
@@ -38,8 +37,8 @@ def download_job_input(job_id, job_input_id):
if not (job_input.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
- os.path.dirname(job_input.path),
- os.path.basename(job_input.path),
+ job_input.path.parent,
+ job_input.path.name,
as_attachment=True,
attachment_filename=job_input.filename,
mimetype=job_input.mimetype
@@ -52,8 +51,8 @@ def download_job_result(job_id, job_result_id):
if not (job_result.job.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
- os.path.dirname(job_result.path),
- os.path.basename(job_result.path),
+ job_result.path.parent,
+ job_result.path.name,
as_attachment=True,
attachment_filename=job_result.filename,
mimetype=job_result.mimetype
diff --git a/app/main/cli.py b/app/main/cli.py
index 45fabf38..cb9cab55 100644
--- a/app/main/cli.py
+++ b/app/main/cli.py
@@ -1,6 +1,7 @@
from flask import current_app
from flask_migrate import upgrade
-import os
+from pathlib import Path
+from typing import List
from app.models import (
CorpusFollowerRole,
Role,
@@ -17,16 +18,15 @@ def deploy():
# Make default directories
print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR']
- default_dirs = [
- os.path.join(base_dir, 'tmp'),
- os.path.join(base_dir, 'users')
+ default_dirs: List[Path] = [
+ base_dir / 'tmp',
+ base_dir / 'users'
]
- for dir in default_dirs:
- if os.path.exists(dir):
- if not os.path.isdir(dir):
- raise NotADirectoryError(f'{dir} is not a directory')
- else:
- os.mkdir(dir)
+ for default_dir in default_dirs:
+ if not default_dir.exists():
+ default_dir.mkdir()
+ if not default_dir.is_dir():
+ raise NotADirectoryError(f'{default_dir} is not a directory')
# migrate database to latest revision
print('Migrate database to latest revision')
diff --git a/app/models/__init__.py b/app/models/__init__.py
index 2ff20306..639fd278 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -11,6 +11,7 @@ from .spacy_nlp_pipeline_model import *
from .tesseract_ocr_pipeline_model import *
from .token import *
from .user import *
+from app import login
@login.user_loader
diff --git a/app/models/avatar.py b/app/models/avatar.py
index c8f67fdf..f361eb45 100644
--- a/app/models/avatar.py
+++ b/app/models/avatar.py
@@ -1,6 +1,6 @@
from flask import current_app
from flask_hashids import HashidMixin
-import os
+from pathlib import Path
from app import db
from .file_mixin import FileMixin
@@ -15,14 +15,16 @@ class Avatar(HashidMixin, FileMixin, db.Model):
user = db.relationship('User', back_populates='avatar')
@property
- def path(self):
- return os.path.join(self.user.path, 'avatar')
+ def path(self) -> Path:
+ return self.user.path / 'avatar'
+ # return os.path.join(self.user.path, 'avatar')
def delete(self):
try:
- os.remove(self.path)
+ self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
+ raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
diff --git a/app/models/corpus.py b/app/models/corpus.py
index 32003461..1d541413 100644
--- a/app/models/corpus.py
+++ b/app/models/corpus.py
@@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from sqlalchemy.ext.associationproxy import association_proxy
from typing import Union
-import os
+from pathlib import Path
import shutil
import xml.etree.ElementTree as ET
from app import db
@@ -88,8 +88,8 @@ class Corpus(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
@property
- def path(self):
- return os.path.join(self.user.path, 'corpora', str(self.id))
+ def path(self) -> Path:
+ return self.user.path / 'corpora' / f'{self.id}'
@property
def url(self):
@@ -105,27 +105,39 @@ class Corpus(HashidMixin, db.Model):
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
+ corpus_files_dir = corpus.path / 'files'
+ corpus_cwb_dir = corpus.path / 'cwb'
+ corpus_cwb_data_dir = corpus_cwb_dir / 'data'
+ corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
- os.mkdir(corpus.path)
- os.mkdir(os.path.join(corpus.path, 'files'))
- os.mkdir(os.path.join(corpus.path, 'cwb'))
- os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
- os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
+ corpus.path.mkdir()
+ corpus_files_dir.mkdir()
+ corpus_cwb_dir.mkdir()
+ corpus_cwb_data_dir.mkdir()
+ corpus_cwb_registry_dir.mkdir()
except OSError as e:
+ # TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
- raise e
+ raise
return corpus
def build(self):
- build_dir = os.path.join(self.path, 'cwb')
- shutil.rmtree(build_dir, ignore_errors=True)
- os.mkdir(build_dir)
- os.mkdir(os.path.join(build_dir, 'data'))
- os.mkdir(os.path.join(build_dir, 'registry'))
+ corpus_cwb_dir = self.path / 'cwb'
+ corpus_cwb_data_dir = corpus_cwb_dir / 'data'
+ corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
+ try:
+ shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
+ corpus_cwb_dir.mkdir()
+ corpus_cwb_data_dir.mkdir()
+ corpus_cwb_registry_dir.mkdir()
+ except OSError as e:
+ current_app.logger.error(e)
+ self.status = CorpusStatus.FAILED
+ raise
corpus_element = ET.fromstring('\n')
for corpus_file in self.files:
- normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt')
+ normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except:
@@ -152,7 +164,7 @@ class Corpus(HashidMixin, db.Model):
# corpus_element.insert(1, text_element)
corpus_element.append(text_element)
ET.ElementTree(corpus_element).write(
- os.path.join(build_dir, 'corpus.vrt'),
+ corpus_cwb_dir / 'corpus.vrt',
encoding='utf-8'
)
self.status = CorpusStatus.SUBMITTED
diff --git a/app/models/corpus_file.py b/app/models/corpus_file.py
index f785dc8f..566ad39f 100644
--- a/app/models/corpus_file.py
+++ b/app/models/corpus_file.py
@@ -1,6 +1,6 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
-import os
+from pathlib import Path
from app import db
from .corpus import CorpusStatus
from .file_mixin import FileMixin
@@ -45,8 +45,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
@property
- def path(self):
- return os.path.join(self.corpus.path, 'files', str(self.id))
+ def path(self) -> Path:
+ return self.corpus.path / 'files' / f'{self.id}'
@property
def url(self):
@@ -66,9 +66,10 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
- os.remove(self.path)
+ self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
+ raise
db.session.delete(self)
self.corpus.status = CorpusStatus.UNPREPARED
diff --git a/app/models/job.py b/app/models/job.py
index 39b30c12..daa043c5 100644
--- a/app/models/job.py
+++ b/app/models/job.py
@@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from typing import Union
-import os
+from pathlib import Path
import shutil
from app import db
from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn
@@ -79,8 +79,8 @@ class Job(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
@property
- def path(self):
- return os.path.join(self.user.path, 'jobs', str(self.id))
+ def path(self) -> Path:
+ return self.user.path / 'jobs' / f'{self.id}'
@property
def url(self):
@@ -96,15 +96,19 @@ class Job(HashidMixin, db.Model):
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
+ job_inputs_dir = job.path / 'inputs'
+ job_pipeline_data_dir = job.path / 'pipeline_data'
+ job_results_dir = job.path / 'results'
try:
- os.mkdir(job.path)
- os.mkdir(os.path.join(job.path, 'inputs'))
- os.mkdir(os.path.join(job.path, 'pipeline_data'))
- os.mkdir(os.path.join(job.path, 'results'))
+ job.path.mkdir()
+ job_inputs_dir.mkdir()
+ job_pipeline_data_dir.mkdir()
+ job_results_dir.mkdir()
except OSError as e:
+ # TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
- raise e
+ raise
return job
def delete(self):
@@ -131,8 +135,8 @@ class Job(HashidMixin, db.Model):
''' Restart a job - only if the status is failed '''
if self.status != JobStatus.FAILED:
raise Exception('Job status is not "failed"')
- shutil.rmtree(os.path.join(self.path, 'results'), ignore_errors=True)
- shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True)
+ shutil.rmtree(self.path / 'results', ignore_errors=True)
+ shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
for result in self.results:
db.session.delete(result)
self.end_date = None
diff --git a/app/models/job_input.py b/app/models/job_input.py
index c3ec9c5a..8405a92b 100644
--- a/app/models/job_input.py
+++ b/app/models/job_input.py
@@ -1,6 +1,6 @@
from flask import url_for
from flask_hashids import HashidMixin
-import os
+from pathlib import Path
from app import db
from .file_mixin import FileMixin
@@ -33,8 +33,8 @@ class JobInput(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
@property
- def path(self):
- return os.path.join(self.job.path, 'inputs', str(self.id))
+ def path(self) -> Path:
+ return self.job.path / 'inputs' / f'{self.id}'
@property
def url(self):
diff --git a/app/models/job_result.py b/app/models/job_result.py
index c99b07fb..b0c9c1e3 100644
--- a/app/models/job_result.py
+++ b/app/models/job_result.py
@@ -1,6 +1,6 @@
from flask import url_for
from flask_hashids import HashidMixin
-import os
+from pathlib import Path
from app import db
from .file_mixin import FileMixin
@@ -35,8 +35,8 @@ class JobResult(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/results/{self.hashid}'
@property
- def path(self):
- return os.path.join(self.job.path, 'results', str(self.id))
+ def path(self) -> Path:
+ return self.job.path / 'results' / f'{self.id}'
@property
def url(self):
diff --git a/app/models/spacy_nlp_pipeline_model.py b/app/models/spacy_nlp_pipeline_model.py
index 127526b7..4cea0d3f 100644
--- a/app/models/spacy_nlp_pipeline_model.py
+++ b/app/models/spacy_nlp_pipeline_model.py
@@ -1,8 +1,7 @@
-from flask import abort, current_app, url_for
+from flask import current_app, url_for
from flask_hashids import HashidMixin
-from time import sleep
from tqdm import tqdm
-import os
+from pathlib import Path
import requests
import yaml
from app import db
@@ -32,12 +31,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
@property
- def path(self):
- return os.path.join(
- self.user.path,
- 'spacy_nlp_pipeline_models',
- str(self.id)
- )
+ def path(self) -> Path:
+ return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
@@ -57,14 +52,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
- defaults_file = os.path.join(
- os.path.dirname(os.path.abspath(__file__)),
- 'default_records',
- 'spacy_nlp_pipeline_model.yml'
- )
- with open(defaults_file, 'r') as f:
- defaults = yaml.safe_load(f)
- for m in defaults:
+ default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
+ with default_records_file.open('r') as f:
+ default_records = yaml.safe_load(f)
+ for m in default_records:
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
@@ -96,7 +87,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
- if not os.path.exists(model.path) or force_download:
+ if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
@@ -116,9 +107,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
- os.remove(self.path)
+ self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
+ raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
diff --git a/app/models/tesseract_ocr_pipeline_model.py b/app/models/tesseract_ocr_pipeline_model.py
index 4e8e9550..20f5feee 100644
--- a/app/models/tesseract_ocr_pipeline_model.py
+++ b/app/models/tesseract_ocr_pipeline_model.py
@@ -1,7 +1,7 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
from tqdm import tqdm
-import os
+from pathlib import Path
import requests
import yaml
from app import db
@@ -30,12 +30,8 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
@property
- def path(self):
- return os.path.join(
- self.user.path,
- 'tesseract_ocr_pipeline_models',
- str(self.id)
- )
+ def path(self) -> Path:
+ return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
@@ -55,14 +51,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
- defaults_file = os.path.join(
- os.path.dirname(os.path.abspath(__file__)),
- 'default_records',
- 'tesseract_ocr_pipeline_model.yml'
- )
- with open(defaults_file, 'r') as f:
- defaults = yaml.safe_load(f)
- for m in defaults:
+ default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
+ with default_records_file.open('r') as f:
+ default_records = yaml.safe_load(f)
+ for m in default_records:
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
@@ -92,7 +84,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
- if not os.path.exists(model.path) or force_download:
+ if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
@@ -112,9 +104,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
- os.remove(self.path)
+ self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
+ raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):
diff --git a/app/models/user.py b/app/models/user.py
index a3b21198..8ba96b14 100644
--- a/app/models/user.py
+++ b/app/models/user.py
@@ -4,14 +4,14 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from flask_login import UserMixin
from sqlalchemy.ext.associationproxy import association_proxy
+from pathlib import Path
from typing import Union
from werkzeug.security import generate_password_hash, check_password_hash
import jwt
-import os
import re
import secrets
import shutil
-from app import db, hashids, login
+from app import db, hashids
from app.ext.flask_sqlalchemy import IntEnumColumn
from .corpus import Corpus
from .corpus_follower_association import CorpusFollowerAssociation
@@ -145,9 +145,8 @@ class User(HashidMixin, UserMixin, db.Model):
self.password_hash = generate_password_hash(password)
@property
- def path(self):
- return os.path.join(
- current_app.config.get('NOPAQUE_DATA_DIR'), 'users', str(self.id))
+ def path(self) -> Path:
+ return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
@staticmethod
def create(**kwargs):
@@ -155,16 +154,21 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
+ user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
+ user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
+ user_corpora_dir = user.path / 'corpora'
+ user_jobs_dir = user.path / 'jobs'
try:
- os.mkdir(user.path)
- os.mkdir(os.path.join(user.path, 'spacy_nlp_pipeline_models'))
- os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models'))
- os.mkdir(os.path.join(user.path, 'corpora'))
- os.mkdir(os.path.join(user.path, 'jobs'))
+ user.path.mkdir()
+ user_spacy_nlp_pipeline_models_dir.mkdir()
+ user_tesseract_ocr_pipeline_models_dir.mkdir()
+ user_corpora_dir.mkdir()
+ user_jobs_dir.mkdir()
except OSError as e:
+ # TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
- raise e
+ raise
return user
@staticmethod
diff --git a/app/services/__init__.py b/app/services/__init__.py
index 25955e3d..bb204103 100644
--- a/app/services/__init__.py
+++ b/app/services/__init__.py
@@ -1,12 +1,11 @@
from flask import Blueprint
from flask_login import login_required
-import os
+from pathlib import Path
import yaml
-services_file = \
- os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml')
-with open(services_file, 'r') as f:
+services_file = Path(__file__).parent / 'services.yml'
+with services_file.open('r') as f:
SERVICES = yaml.safe_load(f)
bp = Blueprint('services', __name__)
diff --git a/app/users/cli.py b/app/users/cli.py
index 6aebca47..a0b474e8 100644
--- a/app/users/cli.py
+++ b/app/users/cli.py
@@ -1,6 +1,4 @@
from app.models import User
-import os
-import shutil
from app import db
from . import bp
diff --git a/app/users/routes.py b/app/users/routes.py
index fbb5a609..1d4cb468 100644
--- a/app/users/routes.py
+++ b/app/users/routes.py
@@ -7,7 +7,6 @@ from flask import (
)
from flask_breadcrumbs import register_breadcrumb
from flask_login import current_user
-import os
from app.models import User
from . import bp
from .utils import user_dynamic_list_constructor as user_dlc
@@ -40,8 +39,8 @@ def user_avatar(user_id):
if user.avatar is None:
return redirect(url_for('static', filename='images/user_avatar.png'))
return send_from_directory(
- os.path.dirname(user.avatar.path),
- os.path.basename(user.avatar.path),
+ user.avatar.path.parent,
+ user.avatar.path.name,
as_attachment=True,
attachment_filename=user.avatar.filename,
mimetype=user.avatar.mimetype
diff --git a/config.py b/config.py
index daafd37f..5f3d4f28 100644
--- a/config.py
+++ b/config.py
@@ -1,6 +1,7 @@
from dotenv import load_dotenv
from flask import Flask
from logging.handlers import RotatingFileHandler
+from pathlib import Path
from werkzeug.middleware.proxy_fix import ProxyFix
import logging
import os
@@ -57,8 +58,7 @@ class Config:
''' # nopaque # '''
NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN')
- NOPAQUE_DATA_DIR = \
- os.path.abspath(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
+ NOPAQUE_DATA_DIR = Path(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
NOPAQUE_IS_PRIMARY_INSTANCE = \
os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true'
NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]'