From 60a59383c7301665e20887a5751c1dc6271af754 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Mon, 15 May 2023 12:00:13 +0200
Subject: [PATCH] A better application structure
---
app/__init__.py | 16 ++-
app/cli/__init__.py | 10 --
app/cli/converter.py | 21 ----
app/cli/corpus.py | 23 ----
app/cli/main.py | 45 --------
app/contributions/__init__.py | 10 +-
app/converters/cli.py | 22 ++++
app/converters/sandpaper.py | 191 +++++++++++++++++-----------------
app/corpora/__init__.py | 5 +-
app/corpora/cli.py | 21 ++++
app/errors/__init__.py | 7 +-
app/errors/handlers.py | 17 +--
app/main/__init__.py | 2 +-
app/main/cli.py | 45 ++++++++
app/tests/__init__.py | 5 -
app/users/__init__.py | 3 +-
16 files changed, 215 insertions(+), 228 deletions(-)
delete mode 100644 app/cli/__init__.py
delete mode 100644 app/cli/converter.py
delete mode 100644 app/cli/corpus.py
delete mode 100644 app/cli/main.py
create mode 100644 app/converters/cli.py
create mode 100644 app/corpora/cli.py
create mode 100644 app/main/cli.py
diff --git a/app/__init__.py b/app/__init__.py
index 3afa99af..3a03e00b 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -13,6 +13,7 @@ from flask_paranoid import Paranoid
from flask_socketio import SocketIO
from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids
+from werkzeug.exceptions import HTTPException
apifairy = APIFairy()
@@ -35,7 +36,7 @@ socketio = SocketIO()
def create_app(config: Config = Config) -> Flask:
''' Creates an initialized Flask (WSGI Application) object. '''
- app: Flask = Flask(__name__)
+ app = Flask(__name__)
app.config.from_object(config)
config.init_app(app)
docker_client.login(
@@ -57,12 +58,6 @@ def create_app(config: Config = Config) -> Flask:
scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
- from .errors import init_app as init_error_handlers
- init_error_handlers(app)
-
- from .cli import init_app as init_cli
- init_cli(app)
-
from .admin import bp as admin_blueprint
default_breadcrumb_root(admin_blueprint, '.admin')
app.register_blueprint(admin_blueprint, url_prefix='/admin')
@@ -80,7 +75,10 @@ def create_app(config: Config = Config) -> Flask:
from .corpora import bp as corpora_blueprint
default_breadcrumb_root(corpora_blueprint, '.corpora')
- app.register_blueprint(corpora_blueprint, url_prefix='/corpora')
+ app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
+
+ from .errors import bp as errors_bp
+ app.register_blueprint(errors_bp)
from .jobs import bp as jobs_blueprint
default_breadcrumb_root(jobs_blueprint, '.jobs')
@@ -88,7 +86,7 @@ def create_app(config: Config = Config) -> Flask:
from .main import bp as main_blueprint
default_breadcrumb_root(main_blueprint, '.')
- app.register_blueprint(main_blueprint)
+ app.register_blueprint(main_blueprint, cli_group=None)
from .services import bp as services_blueprint
default_breadcrumb_root(services_blueprint, '.services')
diff --git a/app/cli/__init__.py b/app/cli/__init__.py
deleted file mode 100644
index 1803deea..00000000
--- a/app/cli/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .converter import init_app as converter_init_app
-from .corpus import init_app as corpus_init_app
-from .main import init_app as main_init_app
-
-
-
-def init_app(app):
- converter_init_app(app)
- corpus_init_app(app)
- main_init_app(app)
diff --git a/app/cli/converter.py b/app/cli/converter.py
deleted file mode 100644
index 4d07bc30..00000000
--- a/app/cli/converter.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import click
-
-
-def init_app(app):
- @app.cli.group('converter')
- def converter():
- ''' Converter commands. '''
- pass
-
- @converter.group('sandpaper')
- def sandpaper_converter():
- ''' Sandpaper converter commands. '''
- pass
-
- @sandpaper_converter.command('run')
- @click.argument('json_db')
- @click.argument('data_dir')
- def run_sandpaper_converter(json_db, data_dir):
- ''' Run the sandpaper converter. '''
- from app.converters.sandpaper import convert
- convert(json_db, data_dir)
diff --git a/app/cli/corpus.py b/app/cli/corpus.py
deleted file mode 100644
index e79269f0..00000000
--- a/app/cli/corpus.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from app.models import Corpus, CorpusStatus
-
-
-def init_app(app):
- @app.cli.group('corpus')
- def corpus():
- ''' Corpus commands. '''
- pass
-
- @corpus.command('dismantle')
- def dismantle():
- ''' Dismantle built corpora. '''
- status = [
- CorpusStatus.QUEUED,
- CorpusStatus.BUILDING,
- CorpusStatus.BUILT,
- CorpusStatus.STARTING_ANALYSIS_SESSION,
- CorpusStatus.RUNNING_ANALYSIS_SESSION,
- CorpusStatus.CANCELING_ANALYSIS_SESSION
- ]
- for corpus in [x for x in Corpus.query.all() if x.status in status]:
- corpus.status = CorpusStatus.SUBMITTED
- corpus.num_analysis_sessions = 0
diff --git a/app/cli/main.py b/app/cli/main.py
deleted file mode 100644
index 2022d609..00000000
--- a/app/cli/main.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from flask import current_app
-from flask_migrate import upgrade
-import os
-from app.models import (
- CorpusFollowerRole,
- Role,
- SpaCyNLPPipelineModel,
- TesseractOCRPipelineModel,
- User
-)
-
-
-def init_app(app):
- @app.cli.command('deploy')
- def deploy():
- ''' Run deployment tasks. '''
- # Make default directories
- print('Make default directories')
- base_dir = current_app.config['NOPAQUE_DATA_DIR']
- default_dirs = [
- os.path.join(base_dir, 'tmp'),
- os.path.join(base_dir, 'users')
- ]
- for dir in default_dirs:
- if os.path.exists(dir):
- if not os.path.isdir(dir):
- raise NotADirectoryError(f'{dir} is not a directory')
- else:
- os.mkdir(dir)
-
- # migrate database to latest revision
- print('Migrate database to latest revision')
- upgrade()
-
- # Insert/Update default database values
- print('Insert/Update default Roles')
- Role.insert_defaults()
- print('Insert/Update default Users')
- User.insert_defaults()
- print('Insert/Update default CorpusFollowerRoles')
- CorpusFollowerRole.insert_defaults()
- print('Insert/Update default SpaCyNLPPipelineModels')
- SpaCyNLPPipelineModel.insert_defaults()
- print('Insert/Update default TesseractOCRPipelineModels')
- TesseractOCRPipelineModel.insert_defaults()
diff --git a/app/contributions/__init__.py b/app/contributions/__init__.py
index 5a7ddf1b..3805e489 100644
--- a/app/contributions/__init__.py
+++ b/app/contributions/__init__.py
@@ -15,7 +15,9 @@ def before_request():
pass
-from . import routes
-from . import spacy_nlp_pipeline_models
-from . import tesseract_ocr_pipeline_models
-from . import transkribus_htr_pipeline_models
+from . import (
+ routes,
+ spacy_nlp_pipeline_models,
+ tesseract_ocr_pipeline_models,
+ transkribus_htr_pipeline_models
+)
diff --git a/app/converters/cli.py b/app/converters/cli.py
new file mode 100644
index 00000000..a7baf465
--- /dev/null
+++ b/app/converters/cli.py
@@ -0,0 +1,22 @@
+import click
+from . import bp
+from .sandpaper import SandpaperConverter
+
+
+@bp.cli.group('converter')
+def converter():
+ ''' Converter commands. '''
+ pass
+
+@converter.group('sandpaper')
+def sandpaper_converter():
+ ''' Sandpaper converter commands. '''
+ pass
+
+@sandpaper_converter.command('run')
+@click.argument('json_db_file')
+@click.argument('data_dir')
+def run_sandpaper_converter(json_db_file, data_dir):
+ ''' Run the sandpaper converter. '''
+ sandpaper_converter = SandpaperConverter(json_db_file, data_dir)
+ sandpaper_converter.run()
diff --git a/app/converters/sandpaper.py b/app/converters/sandpaper.py
index 2ea61d98..27f2bcc6 100644
--- a/app/converters/sandpaper.py
+++ b/app/converters/sandpaper.py
@@ -7,101 +7,106 @@ import os
import shutil
-def convert(json_db_file, data_dir):
- with open(json_db_file, 'r') as f:
- json_db = json.loads(f.read())
+class SandpaperConverter:
+ def __init__(self, json_db_file, data_dir):
+ self.json_db_file = json_db_file
+ self.data_dir = data_dir
- for json_user in json_db:
- if not json_user['confirmed']:
- current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
- continue
- user_dir = os.path.join(data_dir, str(json_user['id']))
- convert_user(json_user, user_dir)
- db.session.commit()
+ def run(self):
+ with open(self.json_db_file, 'r') as f:
+ json_db = json.loads(f.read())
+
+ for json_user in json_db:
+ if not json_user['confirmed']:
+ current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
+ continue
+ user_dir = os.path.join(self.data_dir, str(json_user['id']))
+ self.convert_user(json_user, user_dir)
+ db.session.commit()
-def convert_user(json_user, user_dir):
- current_app.logger.info(f'Create User {json_user["username"]}...')
- user = User(
- confirmed=json_user['confirmed'],
- email=json_user['email'],
- last_seen=datetime.fromtimestamp(json_user['last_seen']),
- member_since=datetime.fromtimestamp(json_user['member_since']),
- password_hash=json_user['password_hash'], # TODO: Needs to be added manually
- username=json_user['username']
- )
- db.session.add(user)
- db.session.flush(objects=[user])
- db.session.refresh(user)
- try:
- user.makedirs()
- except OSError as e:
- current_app.logger.error(e)
- db.session.rollback()
- raise Exception('Internal Server Error')
- for json_corpus in json_user['corpora'].values():
- if not json_corpus['files'].values():
- current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
- continue
- corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
- convert_corpus(json_corpus, user, corpus_dir)
- current_app.logger.info('Done')
-
-
-def convert_corpus(json_corpus, user, corpus_dir):
- current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
- corpus = Corpus(
- user=user,
- creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
- description=json_corpus['description'],
- title=json_corpus['title']
- )
- db.session.add(corpus)
- db.session.flush(objects=[corpus])
- db.session.refresh(corpus)
- try:
- corpus.makedirs()
- except OSError as e:
- current_app.logger.error(e)
- db.session.rollback()
- raise Exception('Internal Server Error')
- for json_corpus_file in json_corpus['files'].values():
- convert_corpus_file(json_corpus_file, corpus, corpus_dir)
- current_app.logger.info('Done')
-
-
-def convert_corpus_file(json_corpus_file, corpus, corpus_dir):
- current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
- corpus_file = CorpusFile(
- corpus=corpus,
- address=json_corpus_file['address'],
- author=json_corpus_file['author'],
- booktitle=json_corpus_file['booktitle'],
- chapter=json_corpus_file['chapter'],
- editor=json_corpus_file['editor'],
- filename=json_corpus_file['filename'],
- institution=json_corpus_file['institution'],
- journal=json_corpus_file['journal'],
- mimetype='application/vrt+xml',
- pages=json_corpus_file['pages'],
- publisher=json_corpus_file['publisher'],
- publishing_year=json_corpus_file['publishing_year'],
- school=json_corpus_file['school'],
- title=json_corpus_file['title']
- )
- db.session.add(corpus_file)
- db.session.flush(objects=[corpus_file])
- db.session.refresh(corpus_file)
- try:
- shutil.copy2(
- os.path.join(corpus_dir, json_corpus_file['filename']),
- corpus_file.path
+ def convert_user(self, json_user, user_dir):
+ current_app.logger.info(f'Create User {json_user["username"]}...')
+ user = User(
+ confirmed=json_user['confirmed'],
+ email=json_user['email'],
+ last_seen=datetime.fromtimestamp(json_user['last_seen']),
+ member_since=datetime.fromtimestamp(json_user['member_since']),
+ password_hash=json_user['password_hash'], # TODO: Needs to be added manually
+ username=json_user['username']
)
- except:
- current_app.logger.warning(
- 'Can not convert corpus file: '
- f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
- ' -> '
- f'{corpus_file.path}'
+ db.session.add(user)
+ db.session.flush(objects=[user])
+ db.session.refresh(user)
+ try:
+ user.makedirs()
+ except OSError as e:
+ current_app.logger.error(e)
+ db.session.rollback()
+ raise Exception('Internal Server Error')
+ for json_corpus in json_user['corpora'].values():
+ if not json_corpus['files'].values():
+ current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
+ continue
+ corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
+ self.convert_corpus(json_corpus, user, corpus_dir)
+ current_app.logger.info('Done')
+
+
+ def convert_corpus(self, json_corpus, user, corpus_dir):
+ current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
+ corpus = Corpus(
+ user=user,
+ creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
+ description=json_corpus['description'],
+ title=json_corpus['title']
)
- current_app.logger.info('Done')
+ db.session.add(corpus)
+ db.session.flush(objects=[corpus])
+ db.session.refresh(corpus)
+ try:
+ corpus.makedirs()
+ except OSError as e:
+ current_app.logger.error(e)
+ db.session.rollback()
+ raise Exception('Internal Server Error')
+ for json_corpus_file in json_corpus['files'].values():
+ self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
+ current_app.logger.info('Done')
+
+
+ def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
+ current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
+ corpus_file = CorpusFile(
+ corpus=corpus,
+ address=json_corpus_file['address'],
+ author=json_corpus_file['author'],
+ booktitle=json_corpus_file['booktitle'],
+ chapter=json_corpus_file['chapter'],
+ editor=json_corpus_file['editor'],
+ filename=json_corpus_file['filename'],
+ institution=json_corpus_file['institution'],
+ journal=json_corpus_file['journal'],
+ mimetype='application/vrt+xml',
+ pages=json_corpus_file['pages'],
+ publisher=json_corpus_file['publisher'],
+ publishing_year=json_corpus_file['publishing_year'],
+ school=json_corpus_file['school'],
+ title=json_corpus_file['title']
+ )
+ db.session.add(corpus_file)
+ db.session.flush(objects=[corpus_file])
+ db.session.refresh(corpus_file)
+ try:
+ shutil.copy2(
+ os.path.join(corpus_dir, json_corpus_file['filename']),
+ corpus_file.path
+ )
+ except:
+ current_app.logger.warning(
+ 'Can not convert corpus file: '
+ f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
+ ' -> '
+ f'{corpus_file.path}'
+ )
+ current_app.logger.info('Done')
diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py
index 3766f2a6..34663b69 100644
--- a/app/corpora/__init__.py
+++ b/app/corpora/__init__.py
@@ -3,6 +3,7 @@ from flask_login import login_required
bp = Blueprint('corpora', __name__)
+bp.cli.short_help = 'Corpus commands.'
@bp.before_request
@@ -15,6 +16,4 @@ def before_request():
pass
-from . import cqi_over_socketio, routes, json_routes
-from . import files
-from . import followers
+from . import cli, cqi_over_socketio, files, followers, routes, json_routes
diff --git a/app/corpora/cli.py b/app/corpora/cli.py
new file mode 100644
index 00000000..d21e8289
--- /dev/null
+++ b/app/corpora/cli.py
@@ -0,0 +1,21 @@
+from app.models import Corpus, CorpusStatus
+import os
+import shutil
+from . import bp
+
+
+@bp.cli.command('reset')
+def reset():
+ ''' Reset built corpora. '''
+ status = [
+ CorpusStatus.QUEUED,
+ CorpusStatus.BUILDING,
+ CorpusStatus.BUILT,
+ CorpusStatus.STARTING_ANALYSIS_SESSION,
+ CorpusStatus.RUNNING_ANALYSIS_SESSION,
+ CorpusStatus.CANCELING_ANALYSIS_SESSION
+ ]
+ for corpus in [x for x in Corpus.query.all() if x.status in status]:
+ shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
+ corpus.status = CorpusStatus.SUBMITTED
+ corpus.num_analysis_sessions = 0
diff --git a/app/errors/__init__.py b/app/errors/__init__.py
index 847658fb..0d79af48 100644
--- a/app/errors/__init__.py
+++ b/app/errors/__init__.py
@@ -1,6 +1,5 @@
-from werkzeug.exceptions import HTTPException
-from .handlers import generic
+from flask import Blueprint
-def init_app(app):
- app.register_error_handler(HTTPException, generic)
+bp = Blueprint('errors', __name__)
+from . import handlers
diff --git a/app/errors/handlers.py b/app/errors/handlers.py
index fe7aaf4f..a18979ab 100644
--- a/app/errors/handlers.py
+++ b/app/errors/handlers.py
@@ -1,13 +1,14 @@
-from flask import jsonify, render_template, request, Response
+from flask import jsonify, render_template, request
from werkzeug.exceptions import HTTPException
-from typing import Tuple, Union
+from . import bp
-def generic(error: HTTPException) -> Tuple[Union[str, Response], int]:
- ''' Generic error handler '''
- accent_json: bool = request.accept_mimetypes.accept_json
- accept_html: bool = request.accept_mimetypes.accept_html
- if accent_json and not accept_html:
- response: Response = jsonify(str(error))
+@bp.app_errorhandler(HTTPException)
+def handle_http_exception(error):
+ ''' Generic HTTP exception handler '''
+ accept_json = request.accept_mimetypes.accept_json
+ accept_html = request.accept_mimetypes.accept_html
+ if accept_json and not accept_html:
+ response = jsonify(str(error))
return response, error.code
return render_template('errors/error.html.j2', error=error), error.code
diff --git a/app/main/__init__.py b/app/main/__init__.py
index f32fed5f..c9586fca 100644
--- a/app/main/__init__.py
+++ b/app/main/__init__.py
@@ -2,4 +2,4 @@ from flask import Blueprint
bp = Blueprint('main', __name__, cli_group=None)
-from . import routes
+from . import cli, routes
diff --git a/app/main/cli.py b/app/main/cli.py
new file mode 100644
index 00000000..0284bb88
--- /dev/null
+++ b/app/main/cli.py
@@ -0,0 +1,45 @@
+from flask import current_app
+from flask_migrate import upgrade
+import os
+from app.models import (
+ CorpusFollowerRole,
+ Role,
+ SpaCyNLPPipelineModel,
+ TesseractOCRPipelineModel,
+ User
+)
+from . import bp
+
+
+@bp.cli.command('deploy')
+def deploy():
+ ''' Run deployment tasks. '''
+ # Make default directories
+ print('Make default directories')
+ base_dir = current_app.config['NOPAQUE_DATA_DIR']
+ default_dirs = [
+ os.path.join(base_dir, 'tmp'),
+ os.path.join(base_dir, 'users')
+ ]
+ for dir in default_dirs:
+ if os.path.exists(dir):
+ if not os.path.isdir(dir):
+ raise NotADirectoryError(f'{dir} is not a directory')
+ else:
+ os.mkdir(dir)
+
+ # migrate database to latest revision
+ print('Migrate database to latest revision')
+ upgrade()
+
+ # Insert/Update default database values
+ print('Insert/Update default Roles')
+ Role.insert_defaults()
+ print('Insert/Update default Users')
+ User.insert_defaults()
+ print('Insert/Update default CorpusFollowerRoles')
+ CorpusFollowerRole.insert_defaults()
+ print('Insert/Update default SpaCyNLPPipelineModels')
+ SpaCyNLPPipelineModel.insert_defaults()
+ print('Insert/Update default TesseractOCRPipelineModels')
+ TesseractOCRPipelineModel.insert_defaults()
diff --git a/app/tests/__init__.py b/app/tests/__init__.py
index 4665c05d..e69de29b 100644
--- a/app/tests/__init__.py
+++ b/app/tests/__init__.py
@@ -1,5 +0,0 @@
-from flask import Blueprint
-
-
-bp = Blueprint('tests', __name__)
-from . import cli
diff --git a/app/users/__init__.py b/app/users/__init__.py
index 8584a56f..b3492a37 100644
--- a/app/users/__init__.py
+++ b/app/users/__init__.py
@@ -15,5 +15,4 @@ def before_request():
pass
-from . import events, json_routes, routes
-from . import settings
+from . import events, json_routes, routes, settings