mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 04:12:45 +00:00 
			
		
		
		
	Use pathlib where possible
This commit is contained in:
		@@ -2,80 +2,69 @@ from flask import current_app
 | 
			
		||||
from app import db
 | 
			
		||||
from app.models import User, Corpus, CorpusFile
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import Dict, List
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SandpaperConverter:
 | 
			
		||||
    def __init__(self, json_db_file, data_dir):
 | 
			
		||||
    def __init__(self, json_db_file: Path, data_dir: Path):
 | 
			
		||||
        self.json_db_file = json_db_file
 | 
			
		||||
        self.data_dir = data_dir
 | 
			
		||||
 | 
			
		||||
    def run(self):
 | 
			
		||||
        with open(self.json_db_file, 'r') as f:
 | 
			
		||||
            json_db = json.loads(f.read())
 | 
			
		||||
        with self.json_db_file.open('r') as f:
 | 
			
		||||
            json_db: List[Dict] = json.load(f)
 | 
			
		||||
 | 
			
		||||
        for json_user in json_db:
 | 
			
		||||
            if not json_user['confirmed']:
 | 
			
		||||
                current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
 | 
			
		||||
                continue
 | 
			
		||||
            user_dir = os.path.join(self.data_dir, str(json_user['id']))
 | 
			
		||||
            user_dir = self.data_dir / f'{json_user["id"]}'
 | 
			
		||||
            self.convert_user(json_user, user_dir)
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def convert_user(self, json_user, user_dir):
 | 
			
		||||
    def convert_user(self, json_user: Dict, user_dir: Path):
 | 
			
		||||
        current_app.logger.info(f'Create User {json_user["username"]}...')
 | 
			
		||||
        user = User(
 | 
			
		||||
            confirmed=json_user['confirmed'],
 | 
			
		||||
            email=json_user['email'],
 | 
			
		||||
            last_seen=datetime.fromtimestamp(json_user['last_seen']),
 | 
			
		||||
            member_since=datetime.fromtimestamp(json_user['member_since']),
 | 
			
		||||
            password_hash=json_user['password_hash'],  # TODO: Needs to be added manually
 | 
			
		||||
            username=json_user['username']
 | 
			
		||||
        )
 | 
			
		||||
        db.session.add(user)
 | 
			
		||||
        db.session.flush(objects=[user])
 | 
			
		||||
        db.session.refresh(user)
 | 
			
		||||
        try:
 | 
			
		||||
            user.makedirs()
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            user = User.create(
 | 
			
		||||
                confirmed=json_user['confirmed'],
 | 
			
		||||
                email=json_user['email'],
 | 
			
		||||
                last_seen=datetime.fromtimestamp(json_user['last_seen']),
 | 
			
		||||
                member_since=datetime.fromtimestamp(json_user['member_since']),
 | 
			
		||||
                password_hash=json_user['password_hash'],  # TODO: Needs to be added manually
 | 
			
		||||
                username=json_user['username']
 | 
			
		||||
            )
 | 
			
		||||
        except OSError:
 | 
			
		||||
            raise Exception('Internal Server Error')
 | 
			
		||||
        for json_corpus in json_user['corpora'].values():
 | 
			
		||||
            if not json_corpus['files'].values():
 | 
			
		||||
                current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
 | 
			
		||||
                continue
 | 
			
		||||
            corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
 | 
			
		||||
            corpus_dir = user_dir / 'corpora' / f'{json_corpus["id"]}'
 | 
			
		||||
            self.convert_corpus(json_corpus, user, corpus_dir)
 | 
			
		||||
        current_app.logger.info('Done')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def convert_corpus(self, json_corpus, user, corpus_dir):
 | 
			
		||||
    def convert_corpus(self, json_corpus: Dict, user: User, corpus_dir: Path):
 | 
			
		||||
        current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
 | 
			
		||||
        corpus = Corpus(
 | 
			
		||||
            user=user,
 | 
			
		||||
            creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
 | 
			
		||||
            description=json_corpus['description'],
 | 
			
		||||
            title=json_corpus['title']
 | 
			
		||||
        )
 | 
			
		||||
        db.session.add(corpus)
 | 
			
		||||
        db.session.flush(objects=[corpus])
 | 
			
		||||
        db.session.refresh(corpus)
 | 
			
		||||
        try:
 | 
			
		||||
            corpus.makedirs()
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            corpus = Corpus.create(
 | 
			
		||||
                user=user,
 | 
			
		||||
                creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
 | 
			
		||||
                description=json_corpus['description'],
 | 
			
		||||
                title=json_corpus['title']
 | 
			
		||||
            )
 | 
			
		||||
        except OSError:
 | 
			
		||||
            raise Exception('Internal Server Error')
 | 
			
		||||
        for json_corpus_file in json_corpus['files'].values():
 | 
			
		||||
            self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
 | 
			
		||||
        current_app.logger.info('Done')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
 | 
			
		||||
    def convert_corpus_file(self, json_corpus_file: Dict, corpus: Corpus, corpus_dir: Path):
 | 
			
		||||
        current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
 | 
			
		||||
        corpus_file = CorpusFile(
 | 
			
		||||
            corpus=corpus,
 | 
			
		||||
@@ -99,13 +88,13 @@ class SandpaperConverter:
 | 
			
		||||
        db.session.refresh(corpus_file)
 | 
			
		||||
        try:
 | 
			
		||||
            shutil.copy2(
 | 
			
		||||
                os.path.join(corpus_dir, json_corpus_file['filename']),
 | 
			
		||||
                corpus_dir / json_corpus_file['filename'],
 | 
			
		||||
                corpus_file.path
 | 
			
		||||
            )
 | 
			
		||||
        except:
 | 
			
		||||
            current_app.logger.warning(
 | 
			
		||||
                'Can not convert corpus file: '
 | 
			
		||||
                f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
 | 
			
		||||
                f'{corpus_dir / json_corpus_file["filename"]}'
 | 
			
		||||
                ' -> '
 | 
			
		||||
                f'{corpus_file.path}'
 | 
			
		||||
            )
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
from app.models import Corpus, CorpusStatus
 | 
			
		||||
import os
 | 
			
		||||
from flask import current_app
 | 
			
		||||
import shutil
 | 
			
		||||
from app import db
 | 
			
		||||
from app.models import Corpus, CorpusStatus
 | 
			
		||||
from . import bp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -18,10 +18,17 @@ def reset():
 | 
			
		||||
    ]
 | 
			
		||||
    for corpus in [x for x in Corpus.query.all() if x.status in status]:
 | 
			
		||||
        print(f'Resetting corpus {corpus}')
 | 
			
		||||
        shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
 | 
			
		||||
        os.mkdir(os.path.join(corpus.path, 'cwb'))
 | 
			
		||||
        os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
 | 
			
		||||
        os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
 | 
			
		||||
        corpus_cwb_dir = corpus.path / 'cwb'
 | 
			
		||||
        corpus_cwb_data_dir = corpus_cwb_dir / 'data'
 | 
			
		||||
        corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
 | 
			
		||||
        try:
 | 
			
		||||
            shutil.rmtree(corpus.path / 'cwb', ignore_errors=True)
 | 
			
		||||
            corpus_cwb_dir.mkdir()
 | 
			
		||||
            corpus_cwb_data_dir.mkdir()
 | 
			
		||||
            corpus_cwb_registry_dir.mkdir()
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            raise
 | 
			
		||||
        corpus.status = CorpusStatus.UNPREPARED
 | 
			
		||||
        corpus.num_analysis_sessions = 0
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 
 | 
			
		||||
@@ -12,7 +12,6 @@ from typing import Dict, List
 | 
			
		||||
import gzip
 | 
			
		||||
import json
 | 
			
		||||
import math
 | 
			
		||||
import os
 | 
			
		||||
from app import db
 | 
			
		||||
from app.models import Corpus
 | 
			
		||||
from .utils import lookups_by_cpos, partial_export_subcorpus, export_subcorpus
 | 
			
		||||
@@ -42,9 +41,9 @@ def ext_corpus_static_data(corpus: str) -> Dict:
 | 
			
		||||
    db_corpus_id: int = session['cqi_over_sio']['db_corpus_id']
 | 
			
		||||
    db_corpus: Corpus = Corpus.query.get(db_corpus_id)
 | 
			
		||||
 | 
			
		||||
    static_data_file_path: str = os.path.join(db_corpus.path, 'cwb', 'static.json.gz')
 | 
			
		||||
    if os.path.exists(static_data_file_path):
 | 
			
		||||
        with open(static_data_file_path, 'rb') as f:
 | 
			
		||||
    static_data_file_path = db_corpus.path / 'cwb' / 'static.json.gz'
 | 
			
		||||
    if static_data_file_path.exists():
 | 
			
		||||
        with static_data_file_path.open('rb') as f:
 | 
			
		||||
            return f.read()
 | 
			
		||||
 | 
			
		||||
    cqi_client: CQiClient = session['cqi_over_sio']['cqi_client']
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,6 @@ from flask import (
 | 
			
		||||
    url_for
 | 
			
		||||
)
 | 
			
		||||
from flask_breadcrumbs import register_breadcrumb
 | 
			
		||||
import os
 | 
			
		||||
from app import db
 | 
			
		||||
from app.models import Corpus, CorpusFile, CorpusStatus
 | 
			
		||||
from ..decorators import corpus_follower_permission_required
 | 
			
		||||
@@ -92,8 +91,8 @@ def corpus_file(corpus_id, corpus_file_id):
 | 
			
		||||
def download_corpus_file(corpus_id, corpus_file_id):
 | 
			
		||||
    corpus_file = CorpusFile.query.filter_by(corpus_id=corpus_id, id=corpus_file_id).first_or_404()
 | 
			
		||||
    return send_from_directory(
 | 
			
		||||
        os.path.dirname(corpus_file.path),
 | 
			
		||||
        os.path.basename(corpus_file.path),
 | 
			
		||||
        corpus_file.path.parent,
 | 
			
		||||
        corpus_file.path.name,
 | 
			
		||||
        as_attachment=True,
 | 
			
		||||
        attachment_filename=corpus_file.filename,
 | 
			
		||||
        mimetype=corpus_file.mimetype
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,6 @@
 | 
			
		||||
from flask import abort, current_app
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from threading import Thread
 | 
			
		||||
import os
 | 
			
		||||
from app import db
 | 
			
		||||
from app.decorators import admin_required, content_negotiation
 | 
			
		||||
from app.models import Job, JobStatus
 | 
			
		||||
@@ -39,7 +38,7 @@ def job_log(job_id):
 | 
			
		||||
    if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
 | 
			
		||||
        response = {'errors': {'message': 'Job status is not completed or failed'}}
 | 
			
		||||
        return response, 409
 | 
			
		||||
    with open(os.path.join(job.path, 'pipeline_data', 'logs', 'pyflow_log.txt')) as log_file:
 | 
			
		||||
    with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
 | 
			
		||||
        log = log_file.read()
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'jobLog': log
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,6 @@ from flask import (
 | 
			
		||||
)
 | 
			
		||||
from flask_breadcrumbs import register_breadcrumb
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
import os
 | 
			
		||||
from app.models import Job, JobInput, JobResult
 | 
			
		||||
from . import bp
 | 
			
		||||
from .utils import job_dynamic_list_constructor as job_dlc
 | 
			
		||||
@@ -38,8 +37,8 @@ def download_job_input(job_id, job_input_id):
 | 
			
		||||
    if not (job_input.job.user == current_user or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return send_from_directory(
 | 
			
		||||
        os.path.dirname(job_input.path),
 | 
			
		||||
        os.path.basename(job_input.path),
 | 
			
		||||
        job_input.path.parent,
 | 
			
		||||
        job_input.path.name,
 | 
			
		||||
        as_attachment=True,
 | 
			
		||||
        attachment_filename=job_input.filename,
 | 
			
		||||
        mimetype=job_input.mimetype
 | 
			
		||||
@@ -52,8 +51,8 @@ def download_job_result(job_id, job_result_id):
 | 
			
		||||
    if not (job_result.job.user == current_user or current_user.is_administrator()):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return send_from_directory(
 | 
			
		||||
        os.path.dirname(job_result.path),
 | 
			
		||||
        os.path.basename(job_result.path),
 | 
			
		||||
        job_result.path.parent,
 | 
			
		||||
        job_result.path.name,
 | 
			
		||||
        as_attachment=True,
 | 
			
		||||
        attachment_filename=job_result.filename,
 | 
			
		||||
        mimetype=job_result.mimetype
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
from flask import current_app
 | 
			
		||||
from flask_migrate import upgrade
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import List
 | 
			
		||||
from app.models import (
 | 
			
		||||
    CorpusFollowerRole,
 | 
			
		||||
    Role,
 | 
			
		||||
@@ -17,16 +18,15 @@ def deploy():
 | 
			
		||||
    # Make default directories
 | 
			
		||||
    print('Make default directories')
 | 
			
		||||
    base_dir = current_app.config['NOPAQUE_DATA_DIR']
 | 
			
		||||
    default_dirs = [
 | 
			
		||||
        os.path.join(base_dir, 'tmp'),
 | 
			
		||||
        os.path.join(base_dir, 'users')
 | 
			
		||||
    default_dirs: List[Path] = [
 | 
			
		||||
        base_dir / 'tmp',
 | 
			
		||||
        base_dir / 'users'
 | 
			
		||||
    ]
 | 
			
		||||
    for dir in default_dirs:
 | 
			
		||||
        if os.path.exists(dir):
 | 
			
		||||
            if not os.path.isdir(dir):
 | 
			
		||||
                raise NotADirectoryError(f'{dir} is not a directory')
 | 
			
		||||
        else:
 | 
			
		||||
            os.mkdir(dir)
 | 
			
		||||
    for default_dir in default_dirs:
 | 
			
		||||
        if not default_dir.exists():
 | 
			
		||||
            default_dir.mkdir()
 | 
			
		||||
        if not default_dir.is_dir():
 | 
			
		||||
            raise NotADirectoryError(f'{default_dir} is not a directory')
 | 
			
		||||
 | 
			
		||||
    # migrate database to latest revision
 | 
			
		||||
    print('Migrate database to latest revision')
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,7 @@ from .spacy_nlp_pipeline_model import *
 | 
			
		||||
from .tesseract_ocr_pipeline_model import *
 | 
			
		||||
from .token import *
 | 
			
		||||
from .user import *
 | 
			
		||||
from app import login
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@login.user_loader
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
from flask import current_app
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from app import db
 | 
			
		||||
from .file_mixin import FileMixin
 | 
			
		||||
 | 
			
		||||
@@ -15,14 +15,16 @@ class Avatar(HashidMixin, FileMixin, db.Model):
 | 
			
		||||
    user = db.relationship('User', back_populates='avatar')
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(self.user.path, 'avatar')
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.user.path / 'avatar'
 | 
			
		||||
        # return os.path.join(self.user.path, 'avatar')
 | 
			
		||||
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        try:
 | 
			
		||||
            os.remove(self.path)
 | 
			
		||||
            self.path.unlink(missing_ok=True)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            raise
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
 | 
			
		||||
    def to_json_serializeable(self, backrefs=False, relationships=False):
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@ from flask import current_app, url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
from sqlalchemy.ext.associationproxy import association_proxy
 | 
			
		||||
from typing import Union
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import shutil
 | 
			
		||||
import xml.etree.ElementTree as ET
 | 
			
		||||
from app import db
 | 
			
		||||
@@ -88,8 +88,8 @@ class Corpus(HashidMixin, db.Model):
 | 
			
		||||
        return f'{self.user.jsonpatch_path}/corpora/{self.hashid}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(self.user.path, 'corpora', str(self.id))
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.user.path / 'corpora' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def url(self):
 | 
			
		||||
@@ -105,27 +105,39 @@ class Corpus(HashidMixin, db.Model):
 | 
			
		||||
        db.session.add(corpus)
 | 
			
		||||
        db.session.flush(objects=[corpus])
 | 
			
		||||
        db.session.refresh(corpus)
 | 
			
		||||
        corpus_files_dir = corpus.path / 'files'
 | 
			
		||||
        corpus_cwb_dir = corpus.path / 'cwb'
 | 
			
		||||
        corpus_cwb_data_dir = corpus_cwb_dir / 'data'
 | 
			
		||||
        corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
 | 
			
		||||
        try:
 | 
			
		||||
            os.mkdir(corpus.path)
 | 
			
		||||
            os.mkdir(os.path.join(corpus.path, 'files'))
 | 
			
		||||
            os.mkdir(os.path.join(corpus.path, 'cwb'))
 | 
			
		||||
            os.mkdir(os.path.join(corpus.path, 'cwb', 'data'))
 | 
			
		||||
            os.mkdir(os.path.join(corpus.path, 'cwb', 'registry'))
 | 
			
		||||
            corpus.path.mkdir()
 | 
			
		||||
            corpus_files_dir.mkdir()
 | 
			
		||||
            corpus_cwb_dir.mkdir()
 | 
			
		||||
            corpus_cwb_data_dir.mkdir()
 | 
			
		||||
            corpus_cwb_registry_dir.mkdir()
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            # TODO: Potential leftover cleanup
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            raise e
 | 
			
		||||
            raise
 | 
			
		||||
        return corpus
 | 
			
		||||
 | 
			
		||||
    def build(self):
 | 
			
		||||
        build_dir = os.path.join(self.path, 'cwb')
 | 
			
		||||
        shutil.rmtree(build_dir, ignore_errors=True)
 | 
			
		||||
        os.mkdir(build_dir)
 | 
			
		||||
        os.mkdir(os.path.join(build_dir, 'data'))
 | 
			
		||||
        os.mkdir(os.path.join(build_dir, 'registry'))
 | 
			
		||||
        corpus_cwb_dir = self.path / 'cwb'
 | 
			
		||||
        corpus_cwb_data_dir = corpus_cwb_dir / 'data'
 | 
			
		||||
        corpus_cwb_registry_dir = corpus_cwb_dir / 'registry'
 | 
			
		||||
        try:
 | 
			
		||||
            shutil.rmtree(corpus_cwb_dir, ignore_errors=True)
 | 
			
		||||
            corpus_cwb_dir.mkdir()
 | 
			
		||||
            corpus_cwb_data_dir.mkdir()
 | 
			
		||||
            corpus_cwb_registry_dir.mkdir()
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            self.status = CorpusStatus.FAILED
 | 
			
		||||
            raise
 | 
			
		||||
        corpus_element = ET.fromstring('<corpus>\n</corpus>')
 | 
			
		||||
        for corpus_file in self.files:
 | 
			
		||||
            normalized_vrt_path = os.path.join(build_dir, f'{corpus_file.id}.norm.vrt')
 | 
			
		||||
            normalized_vrt_path = corpus_cwb_dir / f'{corpus_file.id}.norm.vrt'
 | 
			
		||||
            try:
 | 
			
		||||
                normalize_vrt_file(corpus_file.path, normalized_vrt_path)
 | 
			
		||||
            except:
 | 
			
		||||
@@ -152,7 +164,7 @@ class Corpus(HashidMixin, db.Model):
 | 
			
		||||
            # corpus_element.insert(1, text_element)
 | 
			
		||||
            corpus_element.append(text_element)
 | 
			
		||||
        ET.ElementTree(corpus_element).write(
 | 
			
		||||
            os.path.join(build_dir, 'corpus.vrt'),
 | 
			
		||||
            corpus_cwb_dir / 'corpus.vrt',
 | 
			
		||||
            encoding='utf-8'
 | 
			
		||||
        )
 | 
			
		||||
        self.status = CorpusStatus.SUBMITTED
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
from flask import current_app, url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from app import db
 | 
			
		||||
from .corpus import CorpusStatus
 | 
			
		||||
from .file_mixin import FileMixin
 | 
			
		||||
@@ -45,8 +45,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
        return f'{self.corpus.jsonpatch_path}/files/{self.hashid}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(self.corpus.path, 'files', str(self.id))
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.corpus.path / 'files' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def url(self):
 | 
			
		||||
@@ -66,9 +66,10 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        try:
 | 
			
		||||
            os.remove(self.path)
 | 
			
		||||
            self.path.unlink(missing_ok=True)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            raise
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
        self.corpus.status = CorpusStatus.UNPREPARED
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@ from flask import current_app, url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
from time import sleep
 | 
			
		||||
from typing import Union
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import shutil
 | 
			
		||||
from app import db
 | 
			
		||||
from app.ext.flask_sqlalchemy import ContainerColumn, IntEnumColumn
 | 
			
		||||
@@ -79,8 +79,8 @@ class Job(HashidMixin, db.Model):
 | 
			
		||||
        return f'{self.user.jsonpatch_path}/jobs/{self.hashid}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(self.user.path, 'jobs', str(self.id))
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.user.path / 'jobs' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def url(self):
 | 
			
		||||
@@ -96,15 +96,19 @@ class Job(HashidMixin, db.Model):
 | 
			
		||||
        db.session.add(job)
 | 
			
		||||
        db.session.flush(objects=[job])
 | 
			
		||||
        db.session.refresh(job)
 | 
			
		||||
        job_inputs_dir = job.path / 'inputs'
 | 
			
		||||
        job_pipeline_data_dir = job.path / 'pipeline_data'
 | 
			
		||||
        job_results_dir = job.path / 'results'
 | 
			
		||||
        try:
 | 
			
		||||
            os.mkdir(job.path)
 | 
			
		||||
            os.mkdir(os.path.join(job.path, 'inputs'))
 | 
			
		||||
            os.mkdir(os.path.join(job.path, 'pipeline_data'))
 | 
			
		||||
            os.mkdir(os.path.join(job.path, 'results'))
 | 
			
		||||
            job.path.mkdir()
 | 
			
		||||
            job_inputs_dir.mkdir()
 | 
			
		||||
            job_pipeline_data_dir.mkdir()
 | 
			
		||||
            job_results_dir.mkdir()
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            # TODO: Potential leftover cleanup
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            raise e
 | 
			
		||||
            raise
 | 
			
		||||
        return job
 | 
			
		||||
 | 
			
		||||
    def delete(self):
 | 
			
		||||
@@ -131,8 +135,8 @@ class Job(HashidMixin, db.Model):
 | 
			
		||||
        ''' Restart a job - only if the status is failed '''
 | 
			
		||||
        if self.status != JobStatus.FAILED:
 | 
			
		||||
            raise Exception('Job status is not "failed"')
 | 
			
		||||
        shutil.rmtree(os.path.join(self.path, 'results'), ignore_errors=True)
 | 
			
		||||
        shutil.rmtree(os.path.join(self.path, 'pyflow.data'), ignore_errors=True)
 | 
			
		||||
        shutil.rmtree(self.path / 'results', ignore_errors=True)
 | 
			
		||||
        shutil.rmtree(self.path / 'pyflow.data', ignore_errors=True)
 | 
			
		||||
        for result in self.results:
 | 
			
		||||
            db.session.delete(result)
 | 
			
		||||
        self.end_date = None
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from app import db
 | 
			
		||||
from .file_mixin import FileMixin
 | 
			
		||||
 | 
			
		||||
@@ -33,8 +33,8 @@ class JobInput(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
        return f'{self.job.jsonpatch_path}/inputs/{self.hashid}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(self.job.path, 'inputs', str(self.id))
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.job.path / 'inputs' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def url(self):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from app import db
 | 
			
		||||
from .file_mixin import FileMixin
 | 
			
		||||
 | 
			
		||||
@@ -35,8 +35,8 @@ class JobResult(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
        return f'{self.job.jsonpatch_path}/results/{self.hashid}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(self.job.path, 'results', str(self.id))
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.job.path / 'results' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def url(self):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,8 +1,7 @@
 | 
			
		||||
from flask import abort, current_app, url_for
 | 
			
		||||
from flask import current_app, url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
from time import sleep
 | 
			
		||||
from tqdm import tqdm
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import requests
 | 
			
		||||
import yaml
 | 
			
		||||
from app import db
 | 
			
		||||
@@ -32,12 +31,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
    user = db.relationship('User', back_populates='spacy_nlp_pipeline_models')
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(
 | 
			
		||||
            self.user.path,
 | 
			
		||||
            'spacy_nlp_pipeline_models',
 | 
			
		||||
            str(self.id)
 | 
			
		||||
        )
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.user.path / 'spacy_nlp_pipeline_models' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def jsonpatch_path(self):
 | 
			
		||||
@@ -57,14 +52,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def insert_defaults(force_download=False):
 | 
			
		||||
        nopaque_user = User.query.filter_by(username='nopaque').first()
 | 
			
		||||
        defaults_file = os.path.join(
 | 
			
		||||
            os.path.dirname(os.path.abspath(__file__)),
 | 
			
		||||
            'default_records',
 | 
			
		||||
            'spacy_nlp_pipeline_model.yml'
 | 
			
		||||
        )
 | 
			
		||||
        with open(defaults_file, 'r') as f:
 | 
			
		||||
            defaults = yaml.safe_load(f)
 | 
			
		||||
        for m in defaults:
 | 
			
		||||
        default_records_file = Path(__file__).parent / 'default_records' / 'spacy_nlp_pipeline_model.yml'
 | 
			
		||||
        with default_records_file.open('r') as f:
 | 
			
		||||
            default_records = yaml.safe_load(f)
 | 
			
		||||
        for m in default_records:
 | 
			
		||||
            model = SpaCyNLPPipelineModel.query.filter_by(title=m['title'], version=m['version']).first()  # noqa
 | 
			
		||||
            if model is not None:
 | 
			
		||||
                model.compatible_service_versions = m['compatible_service_versions']
 | 
			
		||||
@@ -96,7 +87,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
                db.session.add(model)
 | 
			
		||||
                db.session.flush(objects=[model])
 | 
			
		||||
                db.session.refresh(model)
 | 
			
		||||
            if not os.path.exists(model.path) or force_download:
 | 
			
		||||
            if not model.path.exists() or force_download:
 | 
			
		||||
                r = requests.get(m['url'], stream=True)
 | 
			
		||||
                pbar = tqdm(
 | 
			
		||||
                    desc=f'{model.title} ({model.filename})',
 | 
			
		||||
@@ -116,9 +107,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
    
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        try:
 | 
			
		||||
            os.remove(self.path)
 | 
			
		||||
            self.path.unlink(missing_ok=True)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            raise
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
 | 
			
		||||
    def to_json_serializeable(self, backrefs=False, relationships=False):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
from flask import current_app, url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
from tqdm import tqdm
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import requests
 | 
			
		||||
import yaml
 | 
			
		||||
from app import db
 | 
			
		||||
@@ -30,12 +30,8 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
    user = db.relationship('User', back_populates='tesseract_ocr_pipeline_models')
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(
 | 
			
		||||
            self.user.path,
 | 
			
		||||
            'tesseract_ocr_pipeline_models',
 | 
			
		||||
            str(self.id)
 | 
			
		||||
        )
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return self.user.path / 'tesseract_ocr_pipeline_models' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def jsonpatch_path(self):
 | 
			
		||||
@@ -55,14 +51,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def insert_defaults(force_download=False):
 | 
			
		||||
        nopaque_user = User.query.filter_by(username='nopaque').first()
 | 
			
		||||
        defaults_file = os.path.join(
 | 
			
		||||
            os.path.dirname(os.path.abspath(__file__)),
 | 
			
		||||
            'default_records',
 | 
			
		||||
            'tesseract_ocr_pipeline_model.yml'
 | 
			
		||||
        )
 | 
			
		||||
        with open(defaults_file, 'r') as f:
 | 
			
		||||
            defaults = yaml.safe_load(f)
 | 
			
		||||
        for m in defaults:
 | 
			
		||||
        default_records_file = Path(__file__).parent / 'default_records' / 'tesseract_ocr_pipeline_model.yml'
 | 
			
		||||
        with default_records_file.open('r') as f:
 | 
			
		||||
            default_records = yaml.safe_load(f)
 | 
			
		||||
        for m in default_records:
 | 
			
		||||
            model = TesseractOCRPipelineModel.query.filter_by(title=m['title'], version=m['version']).first()  # noqa
 | 
			
		||||
            if model is not None:
 | 
			
		||||
                model.compatible_service_versions = m['compatible_service_versions']
 | 
			
		||||
@@ -92,7 +84,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
                db.session.flush(objects=[model])
 | 
			
		||||
                db.session.refresh(model)
 | 
			
		||||
                model.filename = f'{model.id}.traineddata'
 | 
			
		||||
            if not os.path.exists(model.path) or force_download:
 | 
			
		||||
            if not model.path.exists() or force_download:
 | 
			
		||||
                r = requests.get(m['url'], stream=True)
 | 
			
		||||
                pbar = tqdm(
 | 
			
		||||
                    desc=f'{model.title} ({model.filename})',
 | 
			
		||||
@@ -112,9 +104,10 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
 | 
			
		||||
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        try:
 | 
			
		||||
            os.remove(self.path)
 | 
			
		||||
            self.path.unlink(missing_ok=True)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            raise
 | 
			
		||||
        db.session.delete(self)
 | 
			
		||||
 | 
			
		||||
    def to_json_serializeable(self, backrefs=False, relationships=False):
 | 
			
		||||
 
 | 
			
		||||
@@ -4,14 +4,14 @@ from flask import current_app, url_for
 | 
			
		||||
from flask_hashids import HashidMixin
 | 
			
		||||
from flask_login import UserMixin
 | 
			
		||||
from sqlalchemy.ext.associationproxy import association_proxy
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import Union
 | 
			
		||||
from werkzeug.security import generate_password_hash, check_password_hash
 | 
			
		||||
import jwt
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import secrets
 | 
			
		||||
import shutil
 | 
			
		||||
from app import db, hashids, login
 | 
			
		||||
from app import db, hashids
 | 
			
		||||
from app.ext.flask_sqlalchemy import IntEnumColumn
 | 
			
		||||
from .corpus import Corpus
 | 
			
		||||
from .corpus_follower_association import CorpusFollowerAssociation
 | 
			
		||||
@@ -145,9 +145,8 @@ class User(HashidMixin, UserMixin, db.Model):
 | 
			
		||||
        self.password_hash = generate_password_hash(password)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def path(self):
 | 
			
		||||
        return os.path.join(
 | 
			
		||||
            current_app.config.get('NOPAQUE_DATA_DIR'), 'users', str(self.id))
 | 
			
		||||
    def path(self) -> Path:
 | 
			
		||||
        return current_app.config.get('NOPAQUE_DATA_DIR') / 'users' / f'{self.id}'
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def create(**kwargs):
 | 
			
		||||
@@ -155,16 +154,21 @@ class User(HashidMixin, UserMixin, db.Model):
 | 
			
		||||
        db.session.add(user)
 | 
			
		||||
        db.session.flush(objects=[user])
 | 
			
		||||
        db.session.refresh(user)
 | 
			
		||||
        user_spacy_nlp_pipeline_models_dir = user.path / 'spacy_nlp_pipeline_models'
 | 
			
		||||
        user_tesseract_ocr_pipeline_models_dir = user.path / 'tesseract_ocr_pipeline_models'
 | 
			
		||||
        user_corpora_dir = user.path / 'corpora'
 | 
			
		||||
        user_jobs_dir = user.path / 'jobs'
 | 
			
		||||
        try:
 | 
			
		||||
            os.mkdir(user.path)
 | 
			
		||||
            os.mkdir(os.path.join(user.path, 'spacy_nlp_pipeline_models'))
 | 
			
		||||
            os.mkdir(os.path.join(user.path, 'tesseract_ocr_pipeline_models'))
 | 
			
		||||
            os.mkdir(os.path.join(user.path, 'corpora'))
 | 
			
		||||
            os.mkdir(os.path.join(user.path, 'jobs'))
 | 
			
		||||
            user.path.mkdir()
 | 
			
		||||
            user_spacy_nlp_pipeline_models_dir.mkdir()
 | 
			
		||||
            user_tesseract_ocr_pipeline_models_dir.mkdir()
 | 
			
		||||
            user_corpora_dir.mkdir()
 | 
			
		||||
            user_jobs_dir.mkdir()
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            # TODO: Potential leftover cleanup
 | 
			
		||||
            current_app.logger.error(e)
 | 
			
		||||
            db.session.rollback()
 | 
			
		||||
            raise e
 | 
			
		||||
            raise
 | 
			
		||||
        return user
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,11 @@
 | 
			
		||||
from flask import Blueprint
 | 
			
		||||
from flask_login import login_required
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import yaml
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
services_file = \
 | 
			
		||||
    os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml')
 | 
			
		||||
with open(services_file, 'r') as f:
 | 
			
		||||
services_file = Path(__file__).parent / 'services.yml'
 | 
			
		||||
with services_file.open('r') as f:
 | 
			
		||||
    SERVICES = yaml.safe_load(f)
 | 
			
		||||
 | 
			
		||||
bp = Blueprint('services', __name__)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,4 @@
 | 
			
		||||
from app.models import User
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
from app import db
 | 
			
		||||
from . import bp
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,6 @@ from flask import (
 | 
			
		||||
)
 | 
			
		||||
from flask_breadcrumbs import register_breadcrumb
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
import os
 | 
			
		||||
from app.models import User
 | 
			
		||||
from . import bp
 | 
			
		||||
from .utils import user_dynamic_list_constructor as user_dlc
 | 
			
		||||
@@ -40,8 +39,8 @@ def user_avatar(user_id):
 | 
			
		||||
    if user.avatar is None:
 | 
			
		||||
        return redirect(url_for('static', filename='images/user_avatar.png'))
 | 
			
		||||
    return send_from_directory(
 | 
			
		||||
        os.path.dirname(user.avatar.path),
 | 
			
		||||
        os.path.basename(user.avatar.path),
 | 
			
		||||
        user.avatar.path.parent,
 | 
			
		||||
        user.avatar.path.name,
 | 
			
		||||
        as_attachment=True,
 | 
			
		||||
        attachment_filename=user.avatar.filename,
 | 
			
		||||
        mimetype=user.avatar.mimetype
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
from dotenv import load_dotenv
 | 
			
		||||
from flask import Flask
 | 
			
		||||
from logging.handlers import RotatingFileHandler
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from werkzeug.middleware.proxy_fix import ProxyFix
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
@@ -57,8 +58,7 @@ class Config:
 | 
			
		||||
 | 
			
		||||
    ''' # nopaque # '''
 | 
			
		||||
    NOPAQUE_ADMIN = os.environ.get('NOPAQUE_ADMIN')
 | 
			
		||||
    NOPAQUE_DATA_DIR = \
 | 
			
		||||
        os.path.abspath(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
 | 
			
		||||
    NOPAQUE_DATA_DIR = Path(os.environ.get('NOPAQUE_DATA_PATH', '/mnt/nopaque'))
 | 
			
		||||
    NOPAQUE_IS_PRIMARY_INSTANCE = \
 | 
			
		||||
        os.environ.get('NOPAQUE_IS_PRIMARY_INSTANCE', 'true').lower() == 'true'
 | 
			
		||||
    NOPAQUE_MAIL_SUBJECT_PREFIX = '[nopaque]'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user