diff --git a/app/__init__.py b/app/__init__.py index 3afa99af..3a03e00b 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -13,6 +13,7 @@ from flask_paranoid import Paranoid from flask_socketio import SocketIO from flask_sqlalchemy import SQLAlchemy from flask_hashids import Hashids +from werkzeug.exceptions import HTTPException apifairy = APIFairy() @@ -35,7 +36,7 @@ socketio = SocketIO() def create_app(config: Config = Config) -> Flask: ''' Creates an initialized Flask (WSGI Application) object. ''' - app: Flask = Flask(__name__) + app = Flask(__name__) app.config.from_object(config) config.init_app(app) docker_client.login( @@ -57,12 +58,6 @@ def create_app(config: Config = Config) -> Flask: scheduler.init_app(app) socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa - from .errors import init_app as init_error_handlers - init_error_handlers(app) - - from .cli import init_app as init_cli - init_cli(app) - from .admin import bp as admin_blueprint default_breadcrumb_root(admin_blueprint, '.admin') app.register_blueprint(admin_blueprint, url_prefix='/admin') @@ -80,7 +75,10 @@ def create_app(config: Config = Config) -> Flask: from .corpora import bp as corpora_blueprint default_breadcrumb_root(corpora_blueprint, '.corpora') - app.register_blueprint(corpora_blueprint, url_prefix='/corpora') + app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora') + + from .errors import bp as errors_bp + app.register_blueprint(errors_bp) from .jobs import bp as jobs_blueprint default_breadcrumb_root(jobs_blueprint, '.jobs') @@ -88,7 +86,7 @@ def create_app(config: Config = Config) -> Flask: from .main import bp as main_blueprint default_breadcrumb_root(main_blueprint, '.') - app.register_blueprint(main_blueprint) + app.register_blueprint(main_blueprint, cli_group=None) from .services import bp as services_blueprint default_breadcrumb_root(services_blueprint, '.services') diff --git a/app/cli/__init__.py b/app/cli/__init__.py deleted file mode 100644 index 1803deea..00000000 --- a/app/cli/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .converter import init_app as converter_init_app -from .corpus import init_app as corpus_init_app -from .main import init_app as main_init_app - - - -def init_app(app): - converter_init_app(app) - corpus_init_app(app) - main_init_app(app) diff --git a/app/cli/converter.py b/app/cli/converter.py deleted file mode 100644 index 4d07bc30..00000000 --- a/app/cli/converter.py +++ /dev/null @@ -1,21 +0,0 @@ -import click - - -def init_app(app): - @app.cli.group('converter') - def converter(): - ''' Converter commands. ''' - pass - - @converter.group('sandpaper') - def sandpaper_converter(): - ''' Sandpaper converter commands. ''' - pass - - @sandpaper_converter.command('run') - @click.argument('json_db') - @click.argument('data_dir') - def run_sandpaper_converter(json_db, data_dir): - ''' Run the sandpaper converter. ''' - from app.converters.sandpaper import convert - convert(json_db, data_dir) diff --git a/app/cli/corpus.py b/app/cli/corpus.py deleted file mode 100644 index e79269f0..00000000 --- a/app/cli/corpus.py +++ /dev/null @@ -1,23 +0,0 @@ -from app.models import Corpus, CorpusStatus - - -def init_app(app): - @app.cli.group('corpus') - def corpus(): - ''' Corpus commands. ''' - pass - - @corpus.command('dismantle') - def dismantle(): - ''' Dismantle built corpora. ''' - status = [ - CorpusStatus.QUEUED, - CorpusStatus.BUILDING, - CorpusStatus.BUILT, - CorpusStatus.STARTING_ANALYSIS_SESSION, - CorpusStatus.RUNNING_ANALYSIS_SESSION, - CorpusStatus.CANCELING_ANALYSIS_SESSION - ] - for corpus in [x for x in Corpus.query.all() if x.status in status]: - corpus.status = CorpusStatus.SUBMITTED - corpus.num_analysis_sessions = 0 diff --git a/app/cli/main.py b/app/cli/main.py deleted file mode 100644 index 2022d609..00000000 --- a/app/cli/main.py +++ /dev/null @@ -1,45 +0,0 @@ -from flask import current_app -from flask_migrate import upgrade -import os -from app.models import ( - CorpusFollowerRole, - Role, - SpaCyNLPPipelineModel, - TesseractOCRPipelineModel, - User -) - - -def init_app(app): - @app.cli.command('deploy') - def deploy(): - ''' Run deployment tasks. ''' - # Make default directories - print('Make default directories') - base_dir = current_app.config['NOPAQUE_DATA_DIR'] - default_dirs = [ - os.path.join(base_dir, 'tmp'), - os.path.join(base_dir, 'users') - ] - for dir in default_dirs: - if os.path.exists(dir): - if not os.path.isdir(dir): - raise NotADirectoryError(f'{dir} is not a directory') - else: - os.mkdir(dir) - - # migrate database to latest revision - print('Migrate database to latest revision') - upgrade() - - # Insert/Update default database values - print('Insert/Update default Roles') - Role.insert_defaults() - print('Insert/Update default Users') - User.insert_defaults() - print('Insert/Update default CorpusFollowerRoles') - CorpusFollowerRole.insert_defaults() - print('Insert/Update default SpaCyNLPPipelineModels') - SpaCyNLPPipelineModel.insert_defaults() - print('Insert/Update default TesseractOCRPipelineModels') - TesseractOCRPipelineModel.insert_defaults() diff --git a/app/contributions/__init__.py b/app/contributions/__init__.py index 5a7ddf1b..3805e489 100644 --- a/app/contributions/__init__.py +++ b/app/contributions/__init__.py @@ -15,7 +15,9 @@ def before_request(): pass -from . import routes -from . import spacy_nlp_pipeline_models -from . import tesseract_ocr_pipeline_models -from . import transkribus_htr_pipeline_models +from . import ( + routes, + spacy_nlp_pipeline_models, + tesseract_ocr_pipeline_models, + transkribus_htr_pipeline_models +) diff --git a/app/converters/cli.py b/app/converters/cli.py new file mode 100644 index 00000000..a7baf465 --- /dev/null +++ b/app/converters/cli.py @@ -0,0 +1,22 @@ +import click +from . import bp +from .sandpaper import SandpaperConverter + + +@bp.cli.group('converter') +def converter(): + ''' Converter commands. ''' + pass + +@converter.group('sandpaper') +def sandpaper_converter(): + ''' Sandpaper converter commands. ''' + pass + +@sandpaper_converter.command('run') +@click.argument('json_db_file') +@click.argument('data_dir') +def run_sandpaper_converter(json_db_file, data_dir): + ''' Run the sandpaper converter. ''' + sandpaper_converter = SandpaperConverter(json_db_file, data_dir) + sandpaper_converter.run() diff --git a/app/converters/sandpaper.py b/app/converters/sandpaper.py index 2ea61d98..27f2bcc6 100644 --- a/app/converters/sandpaper.py +++ b/app/converters/sandpaper.py @@ -7,101 +7,106 @@ import os import shutil -def convert(json_db_file, data_dir): - with open(json_db_file, 'r') as f: - json_db = json.loads(f.read()) +class SandpaperConverter: + def __init__(self, json_db_file, data_dir): + self.json_db_file = json_db_file + self.data_dir = data_dir - for json_user in json_db: - if not json_user['confirmed']: - current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') - continue - user_dir = os.path.join(data_dir, str(json_user['id'])) - convert_user(json_user, user_dir) - db.session.commit() + def run(self): + with open(self.json_db_file, 'r') as f: + json_db = json.loads(f.read()) + + for json_user in json_db: + if not json_user['confirmed']: + current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') + continue + user_dir = os.path.join(self.data_dir, str(json_user['id'])) + self.convert_user(json_user, user_dir) + db.session.commit() -def convert_user(json_user, user_dir): - current_app.logger.info(f'Create User {json_user["username"]}...') - user = User( - confirmed=json_user['confirmed'], - email=json_user['email'], - last_seen=datetime.fromtimestamp(json_user['last_seen']), - member_since=datetime.fromtimestamp(json_user['member_since']), - password_hash=json_user['password_hash'], # TODO: Needs to be added manually - username=json_user['username'] - ) - db.session.add(user) - db.session.flush(objects=[user]) - db.session.refresh(user) - try: - user.makedirs() - except OSError as e: - current_app.logger.error(e) - db.session.rollback() - raise Exception('Internal Server Error') - for json_corpus in json_user['corpora'].values(): - if not json_corpus['files'].values(): - current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') - continue - corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id'])) - convert_corpus(json_corpus, user, corpus_dir) - current_app.logger.info('Done') - - -def convert_corpus(json_corpus, user, corpus_dir): - current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') - corpus = Corpus( - user=user, - creation_date=datetime.fromtimestamp(json_corpus['creation_date']), - description=json_corpus['description'], - title=json_corpus['title'] - ) - db.session.add(corpus) - db.session.flush(objects=[corpus]) - db.session.refresh(corpus) - try: - corpus.makedirs() - except OSError as e: - current_app.logger.error(e) - db.session.rollback() - raise Exception('Internal Server Error') - for json_corpus_file in json_corpus['files'].values(): - convert_corpus_file(json_corpus_file, corpus, corpus_dir) - current_app.logger.info('Done') - - -def convert_corpus_file(json_corpus_file, corpus, corpus_dir): - current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') - corpus_file = CorpusFile( - corpus=corpus, - address=json_corpus_file['address'], - author=json_corpus_file['author'], - booktitle=json_corpus_file['booktitle'], - chapter=json_corpus_file['chapter'], - editor=json_corpus_file['editor'], - filename=json_corpus_file['filename'], - institution=json_corpus_file['institution'], - journal=json_corpus_file['journal'], - mimetype='application/vrt+xml', - pages=json_corpus_file['pages'], - publisher=json_corpus_file['publisher'], - publishing_year=json_corpus_file['publishing_year'], - school=json_corpus_file['school'], - title=json_corpus_file['title'] - ) - db.session.add(corpus_file) - db.session.flush(objects=[corpus_file]) - db.session.refresh(corpus_file) - try: - shutil.copy2( - os.path.join(corpus_dir, json_corpus_file['filename']), - corpus_file.path + def convert_user(self, json_user, user_dir): + current_app.logger.info(f'Create User {json_user["username"]}...') + user = User( + confirmed=json_user['confirmed'], + email=json_user['email'], + last_seen=datetime.fromtimestamp(json_user['last_seen']), + member_since=datetime.fromtimestamp(json_user['member_since']), + password_hash=json_user['password_hash'], # TODO: Needs to be added manually + username=json_user['username'] ) - except: - current_app.logger.warning( - 'Can not convert corpus file: ' - f'{os.path.join(corpus_dir, json_corpus_file["filename"])}' - ' -> ' - f'{corpus_file.path}' + db.session.add(user) + db.session.flush(objects=[user]) + db.session.refresh(user) + try: + user.makedirs() + except OSError as e: + current_app.logger.error(e) + db.session.rollback() + raise Exception('Internal Server Error') + for json_corpus in json_user['corpora'].values(): + if not json_corpus['files'].values(): + current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') + continue + corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id'])) + self.convert_corpus(json_corpus, user, corpus_dir) + current_app.logger.info('Done') + + + def convert_corpus(self, json_corpus, user, corpus_dir): + current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') + corpus = Corpus( + user=user, + creation_date=datetime.fromtimestamp(json_corpus['creation_date']), + description=json_corpus['description'], + title=json_corpus['title'] ) - current_app.logger.info('Done') + db.session.add(corpus) + db.session.flush(objects=[corpus]) + db.session.refresh(corpus) + try: + corpus.makedirs() + except OSError as e: + current_app.logger.error(e) + db.session.rollback() + raise Exception('Internal Server Error') + for json_corpus_file in json_corpus['files'].values(): + self.convert_corpus_file(json_corpus_file, corpus, corpus_dir) + current_app.logger.info('Done') + + + def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir): + current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') + corpus_file = CorpusFile( + corpus=corpus, + address=json_corpus_file['address'], + author=json_corpus_file['author'], + booktitle=json_corpus_file['booktitle'], + chapter=json_corpus_file['chapter'], + editor=json_corpus_file['editor'], + filename=json_corpus_file['filename'], + institution=json_corpus_file['institution'], + journal=json_corpus_file['journal'], + mimetype='application/vrt+xml', + pages=json_corpus_file['pages'], + publisher=json_corpus_file['publisher'], + publishing_year=json_corpus_file['publishing_year'], + school=json_corpus_file['school'], + title=json_corpus_file['title'] + ) + db.session.add(corpus_file) + db.session.flush(objects=[corpus_file]) + db.session.refresh(corpus_file) + try: + shutil.copy2( + os.path.join(corpus_dir, json_corpus_file['filename']), + corpus_file.path + ) + except: + current_app.logger.warning( + 'Can not convert corpus file: ' + f'{os.path.join(corpus_dir, json_corpus_file["filename"])}' + ' -> ' + f'{corpus_file.path}' + ) + current_app.logger.info('Done') diff --git a/app/corpora/__init__.py b/app/corpora/__init__.py index 3766f2a6..34663b69 100644 --- a/app/corpora/__init__.py +++ b/app/corpora/__init__.py @@ -3,6 +3,7 @@ from flask_login import login_required bp = Blueprint('corpora', __name__) +bp.cli.short_help = 'Corpus commands.' @bp.before_request @@ -15,6 +16,4 @@ def before_request(): pass -from . import cqi_over_socketio, routes, json_routes -from . import files -from . import followers +from . import cli, cqi_over_socketio, files, followers, routes, json_routes diff --git a/app/corpora/cli.py b/app/corpora/cli.py new file mode 100644 index 00000000..d21e8289 --- /dev/null +++ b/app/corpora/cli.py @@ -0,0 +1,21 @@ +from app.models import Corpus, CorpusStatus +import os +import shutil +from . import bp + + +@bp.cli.command('reset') +def reset(): + ''' Reset built corpora. ''' + status = [ + CorpusStatus.QUEUED, + CorpusStatus.BUILDING, + CorpusStatus.BUILT, + CorpusStatus.STARTING_ANALYSIS_SESSION, + CorpusStatus.RUNNING_ANALYSIS_SESSION, + CorpusStatus.CANCELING_ANALYSIS_SESSION + ] + for corpus in [x for x in Corpus.query.all() if x.status in status]: + shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True) + corpus.status = CorpusStatus.SUBMITTED + corpus.num_analysis_sessions = 0 diff --git a/app/errors/__init__.py b/app/errors/__init__.py index 847658fb..0d79af48 100644 --- a/app/errors/__init__.py +++ b/app/errors/__init__.py @@ -1,6 +1,5 @@ -from werkzeug.exceptions import HTTPException -from .handlers import generic +from flask import Blueprint -def init_app(app): - app.register_error_handler(HTTPException, generic) +bp = Blueprint('errors', __name__) +from . import handlers diff --git a/app/errors/handlers.py b/app/errors/handlers.py index fe7aaf4f..a18979ab 100644 --- a/app/errors/handlers.py +++ b/app/errors/handlers.py @@ -1,13 +1,14 @@ -from flask import jsonify, render_template, request, Response +from flask import jsonify, render_template, request from werkzeug.exceptions import HTTPException -from typing import Tuple, Union +from . import bp -def generic(error: HTTPException) -> Tuple[Union[str, Response], int]: - ''' Generic error handler ''' - accent_json: bool = request.accept_mimetypes.accept_json - accept_html: bool = request.accept_mimetypes.accept_html - if accent_json and not accept_html: - response: Response = jsonify(str(error)) +@bp.app_errorhandler(HTTPException) +def handle_http_exception(error): + ''' Generic HTTP exception handler ''' + accept_json = request.accept_mimetypes.accept_json + accept_html = request.accept_mimetypes.accept_html + if accept_json and not accept_html: + response = jsonify(str(error)) return response, error.code return render_template('errors/error.html.j2', error=error), error.code diff --git a/app/main/__init__.py b/app/main/__init__.py index f32fed5f..c9586fca 100644 --- a/app/main/__init__.py +++ b/app/main/__init__.py @@ -2,4 +2,4 @@ from flask import Blueprint bp = Blueprint('main', __name__, cli_group=None) -from . import routes +from . import cli, routes diff --git a/app/main/cli.py b/app/main/cli.py new file mode 100644 index 00000000..0284bb88 --- /dev/null +++ b/app/main/cli.py @@ -0,0 +1,45 @@ +from flask import current_app +from flask_migrate import upgrade +import os +from app.models import ( + CorpusFollowerRole, + Role, + SpaCyNLPPipelineModel, + TesseractOCRPipelineModel, + User +) +from . import bp + + +@bp.cli.command('deploy') +def deploy(): + ''' Run deployment tasks. ''' + # Make default directories + print('Make default directories') + base_dir = current_app.config['NOPAQUE_DATA_DIR'] + default_dirs = [ + os.path.join(base_dir, 'tmp'), + os.path.join(base_dir, 'users') + ] + for dir in default_dirs: + if os.path.exists(dir): + if not os.path.isdir(dir): + raise NotADirectoryError(f'{dir} is not a directory') + else: + os.mkdir(dir) + + # migrate database to latest revision + print('Migrate database to latest revision') + upgrade() + + # Insert/Update default database values + print('Insert/Update default Roles') + Role.insert_defaults() + print('Insert/Update default Users') + User.insert_defaults() + print('Insert/Update default CorpusFollowerRoles') + CorpusFollowerRole.insert_defaults() + print('Insert/Update default SpaCyNLPPipelineModels') + SpaCyNLPPipelineModel.insert_defaults() + print('Insert/Update default TesseractOCRPipelineModels') + TesseractOCRPipelineModel.insert_defaults() diff --git a/app/tests/__init__.py b/app/tests/__init__.py index 4665c05d..e69de29b 100644 --- a/app/tests/__init__.py +++ b/app/tests/__init__.py @@ -1,5 +0,0 @@ -from flask import Blueprint - - -bp = Blueprint('tests', __name__) -from . import cli diff --git a/app/users/__init__.py b/app/users/__init__.py index 8584a56f..b3492a37 100644 --- a/app/users/__init__.py +++ b/app/users/__init__.py @@ -15,5 +15,4 @@ def before_request(): pass -from . import events, json_routes, routes -from . import settings +from . import events, json_routes, routes, settings