Merge branch 'public-corpus' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into public-corpus

This commit is contained in:
Inga Kirschnick 2023-05-15 14:01:49 +02:00
commit 49ff1aa284
16 changed files with 218 additions and 228 deletions

View File

@ -13,6 +13,7 @@ from flask_paranoid import Paranoid
from flask_socketio import SocketIO from flask_socketio import SocketIO
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids from flask_hashids import Hashids
from werkzeug.exceptions import HTTPException
apifairy = APIFairy() apifairy = APIFairy()
@ -35,7 +36,7 @@ socketio = SocketIO()
def create_app(config: Config = Config) -> Flask: def create_app(config: Config = Config) -> Flask:
''' Creates an initialized Flask (WSGI Application) object. ''' ''' Creates an initialized Flask (WSGI Application) object. '''
app: Flask = Flask(__name__) app = Flask(__name__)
app.config.from_object(config) app.config.from_object(config)
config.init_app(app) config.init_app(app)
docker_client.login( docker_client.login(
@ -57,12 +58,6 @@ def create_app(config: Config = Config) -> Flask:
scheduler.init_app(app) scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
from .errors import init_app as init_error_handlers
init_error_handlers(app)
from .cli import init_app as init_cli
init_cli(app)
from .admin import bp as admin_blueprint from .admin import bp as admin_blueprint
default_breadcrumb_root(admin_blueprint, '.admin') default_breadcrumb_root(admin_blueprint, '.admin')
app.register_blueprint(admin_blueprint, url_prefix='/admin') app.register_blueprint(admin_blueprint, url_prefix='/admin')
@ -80,7 +75,10 @@ def create_app(config: Config = Config) -> Flask:
from .corpora import bp as corpora_blueprint from .corpora import bp as corpora_blueprint
default_breadcrumb_root(corpora_blueprint, '.corpora') default_breadcrumb_root(corpora_blueprint, '.corpora')
app.register_blueprint(corpora_blueprint, url_prefix='/corpora') app.register_blueprint(corpora_blueprint, cli_group='corpus', url_prefix='/corpora')
from .errors import bp as errors_bp
app.register_blueprint(errors_bp)
from .jobs import bp as jobs_blueprint from .jobs import bp as jobs_blueprint
default_breadcrumb_root(jobs_blueprint, '.jobs') default_breadcrumb_root(jobs_blueprint, '.jobs')
@ -88,7 +86,7 @@ def create_app(config: Config = Config) -> Flask:
from .main import bp as main_blueprint from .main import bp as main_blueprint
default_breadcrumb_root(main_blueprint, '.') default_breadcrumb_root(main_blueprint, '.')
app.register_blueprint(main_blueprint) app.register_blueprint(main_blueprint, cli_group=None)
from .services import bp as services_blueprint from .services import bp as services_blueprint
default_breadcrumb_root(services_blueprint, '.services') default_breadcrumb_root(services_blueprint, '.services')

View File

@ -1,10 +0,0 @@
from .converter import init_app as converter_init_app
from .corpus import init_app as corpus_init_app
from .main import init_app as main_init_app
def init_app(app):
converter_init_app(app)
corpus_init_app(app)
main_init_app(app)

View File

@ -1,21 +0,0 @@
import click
def init_app(app):
@app.cli.group('converter')
def converter():
''' Converter commands. '''
pass
@converter.group('sandpaper')
def sandpaper_converter():
''' Sandpaper converter commands. '''
pass
@sandpaper_converter.command('run')
@click.argument('json_db')
@click.argument('data_dir')
def run_sandpaper_converter(json_db, data_dir):
''' Run the sandpaper converter. '''
from app.converters.sandpaper import convert
convert(json_db, data_dir)

View File

@ -1,23 +0,0 @@
from app.models import Corpus, CorpusStatus
def init_app(app):
@app.cli.group('corpus')
def corpus():
''' Corpus commands. '''
pass
@corpus.command('dismantle')
def dismantle():
''' Dismantle built corpora. '''
status = [
CorpusStatus.QUEUED,
CorpusStatus.BUILDING,
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]
for corpus in [x for x in Corpus.query.all() if x.status in status]:
corpus.status = CorpusStatus.SUBMITTED
corpus.num_analysis_sessions = 0

View File

@ -1,45 +0,0 @@
from flask import current_app
from flask_migrate import upgrade
import os
from app.models import (
CorpusFollowerRole,
Role,
SpaCyNLPPipelineModel,
TesseractOCRPipelineModel,
User
)
def init_app(app):
@app.cli.command('deploy')
def deploy():
''' Run deployment tasks. '''
# Make default directories
print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs = [
os.path.join(base_dir, 'tmp'),
os.path.join(base_dir, 'users')
]
for dir in default_dirs:
if os.path.exists(dir):
if not os.path.isdir(dir):
raise NotADirectoryError(f'{dir} is not a directory')
else:
os.mkdir(dir)
# migrate database to latest revision
print('Migrate database to latest revision')
upgrade()
# Insert/Update default database values
print('Insert/Update default Roles')
Role.insert_defaults()
print('Insert/Update default Users')
User.insert_defaults()
print('Insert/Update default CorpusFollowerRoles')
CorpusFollowerRole.insert_defaults()
print('Insert/Update default SpaCyNLPPipelineModels')
SpaCyNLPPipelineModel.insert_defaults()
print('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()

View File

@ -15,7 +15,9 @@ def before_request():
pass pass
from . import routes from . import (
from . import spacy_nlp_pipeline_models routes,
from . import tesseract_ocr_pipeline_models spacy_nlp_pipeline_models,
from . import transkribus_htr_pipeline_models tesseract_ocr_pipeline_models,
transkribus_htr_pipeline_models
)

22
app/converters/cli.py Normal file
View File

@ -0,0 +1,22 @@
import click
from . import bp
from .sandpaper import SandpaperConverter
@bp.cli.group('converter')
def converter():
''' Converter commands. '''
pass
@converter.group('sandpaper')
def sandpaper_converter():
''' Sandpaper converter commands. '''
pass
@sandpaper_converter.command('run')
@click.argument('json_db_file')
@click.argument('data_dir')
def run_sandpaper_converter(json_db_file, data_dir):
''' Run the sandpaper converter. '''
sandpaper_converter = SandpaperConverter(json_db_file, data_dir)
sandpaper_converter.run()

View File

@ -7,20 +7,25 @@ import os
import shutil import shutil
def convert(json_db_file, data_dir): class SandpaperConverter:
with open(json_db_file, 'r') as f: def __init__(self, json_db_file, data_dir):
self.json_db_file = json_db_file
self.data_dir = data_dir
def run(self):
with open(self.json_db_file, 'r') as f:
json_db = json.loads(f.read()) json_db = json.loads(f.read())
for json_user in json_db: for json_user in json_db:
if not json_user['confirmed']: if not json_user['confirmed']:
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}') current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
continue continue
user_dir = os.path.join(data_dir, str(json_user['id'])) user_dir = os.path.join(self.data_dir, str(json_user['id']))
convert_user(json_user, user_dir) self.convert_user(json_user, user_dir)
db.session.commit() db.session.commit()
def convert_user(json_user, user_dir): def convert_user(self, json_user, user_dir):
current_app.logger.info(f'Create User {json_user["username"]}...') current_app.logger.info(f'Create User {json_user["username"]}...')
user = User( user = User(
confirmed=json_user['confirmed'], confirmed=json_user['confirmed'],
@ -44,11 +49,11 @@ def convert_user(json_user, user_dir):
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}') current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
continue continue
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id'])) corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
convert_corpus(json_corpus, user, corpus_dir) self.convert_corpus(json_corpus, user, corpus_dir)
current_app.logger.info('Done') current_app.logger.info('Done')
def convert_corpus(json_corpus, user, corpus_dir): def convert_corpus(self, json_corpus, user, corpus_dir):
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...') current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
corpus = Corpus( corpus = Corpus(
user=user, user=user,
@ -66,11 +71,11 @@ def convert_corpus(json_corpus, user, corpus_dir):
db.session.rollback() db.session.rollback()
raise Exception('Internal Server Error') raise Exception('Internal Server Error')
for json_corpus_file in json_corpus['files'].values(): for json_corpus_file in json_corpus['files'].values():
convert_corpus_file(json_corpus_file, corpus, corpus_dir) self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
current_app.logger.info('Done') current_app.logger.info('Done')
def convert_corpus_file(json_corpus_file, corpus, corpus_dir): def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...') current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
corpus_file = CorpusFile( corpus_file = CorpusFile(
corpus=corpus, corpus=corpus,

View File

@ -3,6 +3,7 @@ from flask_login import login_required
bp = Blueprint('corpora', __name__) bp = Blueprint('corpora', __name__)
bp.cli.short_help = 'Corpus commands.'
@bp.before_request @bp.before_request
@ -15,6 +16,4 @@ def before_request():
pass pass
from . import cqi_over_socketio, routes, json_routes from . import cli, cqi_over_socketio, files, followers, routes, json_routes
from . import files
from . import followers

24
app/corpora/cli.py Normal file
View File

@ -0,0 +1,24 @@
from app.models import Corpus, CorpusStatus
import os
import shutil
from app import db
from . import bp
@bp.cli.command('reset')
def reset():
''' Reset built corpora. '''
status = [
CorpusStatus.QUEUED,
CorpusStatus.BUILDING,
CorpusStatus.BUILT,
CorpusStatus.STARTING_ANALYSIS_SESSION,
CorpusStatus.RUNNING_ANALYSIS_SESSION,
CorpusStatus.CANCELING_ANALYSIS_SESSION
]
for corpus in [x for x in Corpus.query.all() if x.status in status]:
print(f'Resetting corpus {corpus}')
shutil.rmtree(os.path.join(corpus.path, 'cwb'), ignore_errors=True)
corpus.status = CorpusStatus.UNPREPARED
corpus.num_analysis_sessions = 0
db.session.commit()

View File

@ -1,6 +1,5 @@
from werkzeug.exceptions import HTTPException from flask import Blueprint
from .handlers import generic
def init_app(app): bp = Blueprint('errors', __name__)
app.register_error_handler(HTTPException, generic) from . import handlers

View File

@ -1,13 +1,14 @@
from flask import jsonify, render_template, request, Response from flask import jsonify, render_template, request
from werkzeug.exceptions import HTTPException from werkzeug.exceptions import HTTPException
from typing import Tuple, Union from . import bp
def generic(error: HTTPException) -> Tuple[Union[str, Response], int]: @bp.app_errorhandler(HTTPException)
''' Generic error handler ''' def handle_http_exception(error):
accent_json: bool = request.accept_mimetypes.accept_json ''' Generic HTTP exception handler '''
accept_html: bool = request.accept_mimetypes.accept_html accept_json = request.accept_mimetypes.accept_json
if accent_json and not accept_html: accept_html = request.accept_mimetypes.accept_html
response: Response = jsonify(str(error)) if accept_json and not accept_html:
response = jsonify(str(error))
return response, error.code return response, error.code
return render_template('errors/error.html.j2', error=error), error.code return render_template('errors/error.html.j2', error=error), error.code

View File

@ -2,4 +2,4 @@ from flask import Blueprint
bp = Blueprint('main', __name__, cli_group=None) bp = Blueprint('main', __name__, cli_group=None)
from . import routes from . import cli, routes

45
app/main/cli.py Normal file
View File

@ -0,0 +1,45 @@
from flask import current_app
from flask_migrate import upgrade
import os
from app.models import (
CorpusFollowerRole,
Role,
SpaCyNLPPipelineModel,
TesseractOCRPipelineModel,
User
)
from . import bp
@bp.cli.command('deploy')
def deploy():
''' Run deployment tasks. '''
# Make default directories
print('Make default directories')
base_dir = current_app.config['NOPAQUE_DATA_DIR']
default_dirs = [
os.path.join(base_dir, 'tmp'),
os.path.join(base_dir, 'users')
]
for dir in default_dirs:
if os.path.exists(dir):
if not os.path.isdir(dir):
raise NotADirectoryError(f'{dir} is not a directory')
else:
os.mkdir(dir)
# migrate database to latest revision
print('Migrate database to latest revision')
upgrade()
# Insert/Update default database values
print('Insert/Update default Roles')
Role.insert_defaults()
print('Insert/Update default Users')
User.insert_defaults()
print('Insert/Update default CorpusFollowerRoles')
CorpusFollowerRole.insert_defaults()
print('Insert/Update default SpaCyNLPPipelineModels')
SpaCyNLPPipelineModel.insert_defaults()
print('Insert/Update default TesseractOCRPipelineModels')
TesseractOCRPipelineModel.insert_defaults()

View File

@ -1,5 +0,0 @@
from flask import Blueprint
bp = Blueprint('tests', __name__)
from . import cli

View File

@ -15,5 +15,4 @@ def before_request():
pass pass
from . import events, json_routes, routes from . import events, json_routes, routes, settings
from . import settings