Use pathlib where possible

This commit is contained in:
Patrick Jentsch
2024-03-07 15:49:04 +01:00
parent ec23bd94ee
commit 9da74c1c6f
21 changed files with 164 additions and 167 deletions

@ -11,6 +11,7 @@ from .spacy_nlp_pipeline_model import *
from .tesseract_ocr_pipeline_model import *
from .token import *
from .user import *
from app import login
@login.user_loader

@ -1,6 +1,6 @@
from flask import current_app
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .file_mixin import FileMixin
@ -15,14 +15,16 @@ class Avatar(HashidMixin, FileMixin, db.Model):
user = db.relationship('User', back_populates='avatar')
@property
def path(self):
return os.path.join(self.user.path, 'avatar')
def path(self) -> Path:
return self.user.path / 'avatar'
# return os.path.join(self.user.path, 'avatar')
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):

@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from sqlalchemy.ext.associationproxy import association_proxy
from typing import Union
import os
from pathlib import Path
import shutil
import xml.etree.ElementTree as ET
from app import db
@ -88,8 +88,8 @@ class Corpus(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
@property
def path(self):
return os.path.join(self.user.path, 'corpora', str(self.id))
def path(self) -> Path:
return self.user.path / 'corpora' / f'{self.id}'
@property
def url(self):
@ -105,27 +105,39 @@ class Corpus(HashidMixin, db.Model):
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
corpus_files_dir = corpus.path / 'files'
corpus_cwb_dir = corpus.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
os.mkdir(corpus.path)
os.mkdir(os.path.join(corpus.path, 'files'))
os.mkdir(os.path.join(corpus.path, 'cwb'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
corpus.path.mkdir()
corpus_files_dir.mkdir()
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise e
raise
return corpus
def build(self):
build_dir = os.path.join(self.path, 'cwb')
shutil.rmtree(build_dir, ignore_errors=True)
os.mkdir(build_dir)
os.mkdir(os.path.join(build_dir, 'data'))
os.mkdir(os.path.join(build_dir, 'registry'))
corpus_cwb_dir = self.path / 'cwb'
corpus_cwb_data_dir = corpus_cwb_dir / 'data'
corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
try:
shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
corpus_cwb_dir.mkdir()
corpus_cwb_data_dir.mkdir()
corpus_cwb_registry_dir.mkdir()
except OSError as e:
current_app.logger.error(e)
self.status = CorpusStatus.FAILED
raise
corpus_element = ET.fromstring('<corpus>\n</corpus>')
for corpus_file in self.files:
normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt')
normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
try:
normalize_vrt_file(corpus_file.path, normalized_vrt_path)
except:
@ -152,7 +164,7 @@ class Corpus(HashidMixin, db.Model):
# corpus_element.insert(1, text_element)
corpus_element.append(text_element)
ET.ElementTree(corpus_element).write(
os.path.join(build_dir, 'corpus.vrt'),
corpus_cwb_dir / 'corpus.vrt',
encoding='utf-8'
)
self.status = CorpusStatus.SUBMITTED

@ -1,6 +1,6 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .corpus import CorpusStatus
from .file_mixin import FileMixin
@ -45,8 +45,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
@property
def path(self):
return os.path.join(self.corpus.path, 'files', str(self.id))
def path(self) -> Path:
return self.corpus.path / 'files' / f'{self.id}'
@property
def url(self):
@ -66,9 +66,10 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
self.corpus.status = CorpusStatus.UNPREPARED

@ -4,7 +4,7 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from typing import Union
import os
from pathlib import Path
import shutil
from app import db
from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn
@ -79,8 +79,8 @@ class Job(HashidMixin, db.Model):
return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
@property
def path(self):
return os.path.join(self.user.path, 'jobs', str(self.id))
def path(self) -> Path:
return self.user.path / 'jobs' / f'{self.id}'
@property
def url(self):
@ -96,15 +96,19 @@ class Job(HashidMixin, db.Model):
db.session.add(job)
db.session.flush(objects=[job])
db.session.refresh(job)
job_inputs_dir = job.path / 'inputs'
job_pipeline_data_dir = job.path / 'pipeline_data'
job_results_dir = job.path / 'results'
try:
os.mkdir(job.path)
os.mkdir(os.path.join(job.path, 'inputs'))
os.mkdir(os.path.join(job.path, 'pipeline_data'))
os.mkdir(os.path.join(job.path, 'results'))
job.path.mkdir()
job_inputs_dir.mkdir()
job_pipeline_data_dir.mkdir()
job_results_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise e
raise
return job
def delete(self):
@ -131,8 +135,8 @@ class Job(HashidMixin, db.Model):
''' Restart a job - only if the status is failed '''
if self.status != JobStatus.FAILED:
raise Exception('Job status is not "failed"')
shutil.rmtree(os.path.join(self.path, 'results'), ignore_errors=True)
shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True)
shutil.rmtree(self.path / 'results', ignore_errors=True)
shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
for result in self.results:
db.session.delete(result)
self.end_date = None

@ -1,6 +1,6 @@
from flask import url_for
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .file_mixin import FileMixin
@ -33,8 +33,8 @@ class JobInput(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
@property
def path(self):
return os.path.join(self.job.path, 'inputs', str(self.id))
def path(self) -> Path:
return self.job.path / 'inputs' / f'{self.id}'
@property
def url(self):

@ -1,6 +1,6 @@
from flask import url_for
from flask_hashids import HashidMixin
import os
from pathlib import Path
from app import db
from .file_mixin import FileMixin
@ -35,8 +35,8 @@ class JobResult(FileMixin, HashidMixin, db.Model):
return f'{self.job.jsonpatch_path}/results/{self.hashid}'
@property
def path(self):
return os.path.join(self.job.path, 'results', str(self.id))
def path(self) -> Path:
return self.job.path / 'results' / f'{self.id}'
@property
def url(self):

@ -1,8 +1,7 @@
from flask import abort, current_app, url_for
from flask import current_app, url_for
from flask_hashids import HashidMixin
from time import sleep
from tqdm import tqdm
import os
from pathlib import Path
import requests
import yaml
from app import db
@ -32,12 +31,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
@property
def path(self):
return os.path.join(
self.user.path,
'spacy_nlp_pipeline_models',
str(self.id)
)
def path(self) -> Path:
return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
@ -57,14 +52,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'default_records',
'spacy_nlp_pipeline_model.yml'
)
with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f)
for m in defaults:
default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
with default_records_file.open('r') as f:
default_records = yaml.safe_load(f)
for m in default_records:
model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
@ -96,7 +87,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
if not os.path.exists(model.path) or force_download:
if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
@ -116,9 +107,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):

@ -1,7 +1,7 @@
from flask import current_app, url_for
from flask_hashids import HashidMixin
from tqdm import tqdm
import os
from pathlib import Path
import requests
import yaml
from app import db
@ -30,12 +30,8 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
@property
def path(self):
return os.path.join(
self.user.path,
'tesseract_ocr_pipeline_models',
str(self.id)
)
def path(self) -> Path:
return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
@property
def jsonpatch_path(self):
@ -55,14 +51,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
@staticmethod
def insert_defaults(force_download=False):
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'default_records',
'tesseract_ocr_pipeline_model.yml'
)
with open(defaults_file, 'r') as f:
defaults = yaml.safe_load(f)
for m in defaults:
default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
with default_records_file.open('r') as f:
default_records = yaml.safe_load(f)
for m in default_records:
model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first() # noqa
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
@ -92,7 +84,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
if not os.path.exists(model.path) or force_download:
if not model.path.exists() or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
@ -112,9 +104,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
def delete(self):
try:
os.remove(self.path)
self.path.unlink(missing_ok=True)
except OSError as e:
current_app.logger.error(e)
raise
db.session.delete(self)
def to_json_serializeable(self, backrefs=False, relationships=False):

@ -4,14 +4,14 @@ from flask import current_app, url_for
from flask_hashids import HashidMixin
from flask_login import UserMixin
from sqlalchemy.ext.associationproxy import association_proxy
from pathlib import Path
from typing import Union
from werkzeug.security import generate_password_hash, check_password_hash
import jwt
import os
import re
import secrets
import shutil
from app import db, hashids, login
from app import db, hashids
from app.ext.flask_sqlalchemy import IntEnumColumn
from .corpus import Corpus
from .corpus_follower_association import CorpusFollowerAssociation
@ -145,9 +145,8 @@ class User(HashidMixin, UserMixin, db.Model):
self.password_hash = generate_password_hash(password)
@property
def path(self):
return os.path.join(
current_app.config.get('NOPAQUE_DATA_DIR'), 'users', str(self.id))
def path(self) -> Path:
return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
@staticmethod
def create(**kwargs):
@ -155,16 +154,21 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.add(user)
db.session.flush(objects=[user])
db.session.refresh(user)
user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
user_corpora_dir = user.path / 'corpora'
user_jobs_dir = user.path / 'jobs'
try:
os.mkdir(user.path)
os.mkdir(os.path.join(user.path, 'spacy_nlp_pipeline_models'))
os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models'))
os.mkdir(os.path.join(user.path, 'corpora'))
os.mkdir(os.path.join(user.path, 'jobs'))
user.path.mkdir()
user_spacy_nlp_pipeline_models_dir.mkdir()
user_tesseract_ocr_pipeline_models_dir.mkdir()
user_corpora_dir.mkdir()
user_jobs_dir.mkdir()
except OSError as e:
# TODO: Potential leftover cleanup
current_app.logger.error(e)
db.session.rollback()
raise e
raise
return user
@staticmethod