Restructure code and use APScheduler for daemon functionality

This commit is contained in:
Patrick Jentsch 2022-06-28 12:30:02 +02:00
parent 7c52d3f392
commit b8bf004684
20 changed files with 755 additions and 710 deletions

View File

@ -1,4 +1,5 @@
from config import Config from config import Config
from docker import DockerClient
from flask import Flask from flask import Flask
from flask_apscheduler import APScheduler from flask_apscheduler import APScheduler
from flask_assets import Environment from flask_assets import Environment
@ -11,18 +12,19 @@ from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids from flask_hashids import Hashids
assets: Environment = Environment() assets = Environment()
db: SQLAlchemy = SQLAlchemy() db = SQLAlchemy()
hashids: Hashids = Hashids() docker_client = DockerClient()
login: LoginManager = LoginManager() hashids = Hashids()
login = LoginManager()
login.login_view = 'auth.login' login.login_view = 'auth.login'
login.login_message = 'Please log in to access this page.' login.login_message = 'Please log in to access this page.'
mail: Mail = Mail() mail = Mail()
migrate: Migrate = Migrate() migrate = Migrate()
paranoid: Paranoid = Paranoid() paranoid = Paranoid()
paranoid.redirect_view = '/' paranoid.redirect_view = '/'
scheduler: APScheduler = APScheduler() # TODO: Use this! scheduler = APScheduler()
socketio: SocketIO = SocketIO() socketio = SocketIO()
def create_app(config: Config = Config) -> Flask: def create_app(config: Config = Config) -> Flask:
@ -30,6 +32,11 @@ def create_app(config: Config = Config) -> Flask:
app: Flask = Flask(__name__) app: Flask = Flask(__name__)
app.config.from_object(config) app.config.from_object(config)
config.init_app(app) config.init_app(app)
docker_client.login(
username=app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
)
assets.init_app(app) assets.init_app(app)
db.init_app(app) db.init_app(app)
@ -38,10 +45,11 @@ def create_app(config: Config = Config) -> Flask:
mail.init_app(app) mail.init_app(app)
migrate.init_app(app, db) migrate.init_app(app, db)
paranoid.init_app(app) paranoid.init_app(app)
scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
from app import socketio_event_listeners from . import tasks
from app import sqlalchemy_event_listeners tasks.register(app, scheduler)
from .admin import bp as admin_blueprint from .admin import bp as admin_blueprint
app.register_blueprint(admin_blueprint, url_prefix='/admin') app.register_blueprint(admin_blueprint, url_prefix='/admin')

View File

@ -1,7 +1,6 @@
from app.models import Role, User, TesseractOCRModel, TranskribusHTRModel
from flask import current_app from flask import current_app
from flask_migrate import upgrade from flask_migrate import upgrade
from . import db
from .models import Corpus, Role, User, TesseractOCRModel, TranskribusHTRModel
import click import click
import os import os
@ -41,22 +40,6 @@ def register(app):
current_app.logger.info('Insert/Update default TranskribusHTRModels') current_app.logger.info('Insert/Update default TranskribusHTRModels')
TranskribusHTRModel.insert_defaults() TranskribusHTRModel.insert_defaults()
@app.cli.group()
def daemon():
''' Daemon commands. '''
pass
@daemon.command('run')
def run_daemon():
''' Run daemon '''
corpus: Corpus
for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0):
corpus.num_analysis_sessions = 0
db.session.commit()
from app.daemon import Daemon
daemon: Daemon = Daemon()
daemon.run()
@app.cli.group() @app.cli.group()
def converter(): def converter():
''' Converter commands. ''' ''' Converter commands. '''

View File

@ -1,23 +1,9 @@
from app import db from app import db
from flask import current_app from .corpus_utils import check_corpora
from time import sleep from .job_utils import check_jobs
from .corpus_utils import CheckCorporaMixin
from .job_utils import CheckJobsMixin
import docker
class Daemon(CheckCorporaMixin, CheckJobsMixin): def daemon():
def __init__(self): check_corpora()
self.docker = docker.from_env() check_jobs()
self.docker.login(
username=current_app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=current_app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=current_app.config['NOPAQUE_DOCKER_REGISTRY']
)
def run(self):
while True:
self.check_corpora()
self.check_jobs()
db.session.commit() db.session.commit()
sleep(1.5)

View File

@ -1,3 +1,4 @@
from app import docker_client
from app.models import Corpus, CorpusStatus from app.models import Corpus, CorpusStatus
from flask import current_app from flask import current_app
import docker import docker
@ -5,25 +6,24 @@ import os
import shutil import shutil
class CheckCorporaMixin: def check_corpora():
def check_corpora(self):
corpora = Corpus.query.all() corpora = Corpus.query.all()
for corpus in (x for x in corpora if x.status == CorpusStatus.SUBMITTED): # noqa for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
self.create_build_corpus_service(corpus) _create_build_corpus_service(corpus)
for corpus in (x for x in corpora if x.status == CorpusStatus.QUEUED or x.status == CorpusStatus.BUILDING): # noqa for corpus in [x for x in corpora if x.status in [CorpusStatus.QUEUED, CorpusStatus.BUILDING]]:
self.checkout_build_corpus_service(corpus) _checkout_build_corpus_service(corpus)
for corpus in (x for x in corpora if x.status == CorpusStatus.BUILT and x.num_analysis_sessions > 0): # noqa for corpus in [x for x in corpora if x.status == CorpusStatus.BUILT and x.num_analysis_sessions > 0]:
corpus.status = CorpusStatus.STARTING_ANALYSIS_SESSION corpus.status = CorpusStatus.STARTING_ANALYSIS_SESSION
for corpus in (x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0): # noqa for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
for corpus in (x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION): # noqa for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
self.checkout_analysing_corpus_container(corpus) _checkout_analysing_corpus_container(corpus)
for corpus in (x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION): # noqa for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
self.create_cqpserver_container(corpus) _create_cqpserver_container(corpus)
for corpus in (x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION): # noqa for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
self.remove_cqpserver_container(corpus) _remove_cqpserver_container(corpus)
def create_build_corpus_service(self, corpus): def _create_build_corpus_service(corpus):
''' # Docker service settings # ''' ''' # Docker service settings # '''
''' ## Command ## ''' ''' ## Command ## '''
command = ['bash', '-c'] command = ['bash', '-c']
@ -37,7 +37,7 @@ class CheckCorporaMixin:
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}' f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}'
' -P pos -P lemma -P simple_pos' ' -P pos -P lemma -P simple_pos'
' -S ent:0+type -S s:0' ' -S ent:0+type -S s:0'
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title' # noqa ' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
' -xsB -9' ' -xsB -9'
' && ' ' && '
f'cwb-make -V NOPAQUE_{corpus.id}' f'cwb-make -V NOPAQUE_{corpus.id}'
@ -80,7 +80,7 @@ class CheckCorporaMixin:
''' ## Restart policy ## ''' ''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy() restart_policy = docker.types.RestartPolicy()
try: try:
self.docker.services.create( docker_client.services.create(
image, image,
command=command, command=command,
constraints=constraints, constraints=constraints,
@ -98,10 +98,10 @@ class CheckCorporaMixin:
return return
corpus.status = CorpusStatus.QUEUED corpus.status = CorpusStatus.QUEUED
def checkout_build_corpus_service(self, corpus): def _checkout_build_corpus_service(corpus):
service_name = f'build-corpus_{corpus.id}' service_name = f'build-corpus_{corpus.id}'
try: try:
service = self.docker.services.get(service_name) service = docker_client.services.get(service_name)
except docker.errors.NotFound as e: except docker.errors.NotFound as e:
current_app.logger.error( current_app.logger.error(
f'Get service "{service_name}" failed ' f'Get service "{service_name}" failed '
@ -118,12 +118,12 @@ class CheckCorporaMixin:
if not service_tasks: if not service_tasks:
return return
task_state = service_tasks[0].get('Status').get('State') task_state = service_tasks[0].get('Status').get('State')
if corpus.status == CorpusStatus.QUEUED and task_state != 'pending': # noqa if corpus.status == CorpusStatus.QUEUED and task_state != 'pending':
corpus.status = CorpusStatus.BUILDING corpus.status = CorpusStatus.BUILDING
return return
elif corpus.status == CorpusStatus.BUILDING and task_state == 'complete': # noqa elif corpus.status == CorpusStatus.BUILDING and task_state == 'complete':
corpus.status = CorpusStatus.BUILT corpus.status = CorpusStatus.BUILT
elif corpus.status == CorpusStatus.BUILDING and task_state == 'failed': # noqa elif corpus.status == CorpusStatus.BUILDING and task_state == 'failed':
corpus.status = CorpusStatus.FAILED corpus.status = CorpusStatus.FAILED
else: else:
return return
@ -135,7 +135,7 @@ class CheckCorporaMixin:
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )
def create_cqpserver_container(self, corpus): def _create_cqpserver_container(corpus):
''' # Docker container settings # ''' ''' # Docker container settings # '''
''' ## Command ## ''' ''' ## Command ## '''
command = [] command = []
@ -166,12 +166,12 @@ class CheckCorporaMixin:
''' ### Corpus registry volume ### ''' ''' ### Corpus registry volume ### '''
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry') registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_volume_target = '/usr/local/share/cwb/registry' registry_volume_target = '/usr/local/share/cwb/registry'
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw' # noqa registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
volumes.append(registry_volume) volumes.append(registry_volume)
# Check if a cqpserver container already exists. If this is the case, # Check if a cqpserver container already exists. If this is the case,
# remove it and create a new one # remove it and create a new one
try: try:
container = self.docker.containers.get(name) container = docker_client.containers.get(name)
except docker.errors.NotFound: except docker.errors.NotFound:
pass pass
except docker.errors.APIError as e: except docker.errors.APIError as e:
@ -190,7 +190,7 @@ class CheckCorporaMixin:
) )
return return
try: try:
self.docker.containers.run( docker_client.containers.run(
image, image,
command=command, command=command,
detach=detach, detach=detach,
@ -215,10 +215,10 @@ class CheckCorporaMixin:
return return
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
def checkout_analysing_corpus_container(self, corpus): def _checkout_analysing_corpus_container(corpus):
container_name = f'cqpserver_{corpus.id}' container_name = f'cqpserver_{corpus.id}'
try: try:
self.docker.containers.get(container_name) docker_client.containers.get(container_name)
except docker.errors.NotFound as e: except docker.errors.NotFound as e:
current_app.logger.error( current_app.logger.error(
f'Get container "{container_name}" failed ' f'Get container "{container_name}" failed '
@ -232,10 +232,10 @@ class CheckCorporaMixin:
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )
def remove_cqpserver_container(self, corpus): def _remove_cqpserver_container(corpus):
container_name = f'cqpserver_{corpus.id}' container_name = f'cqpserver_{corpus.id}'
try: try:
container = self.docker.containers.get(container_name) container = docker_client.containers.get(container_name)
except docker.errors.NotFound: except docker.errors.NotFound:
corpus.status = CorpusStatus.BUILT corpus.status = CorpusStatus.BUILT
return return

View File

@ -1,4 +1,4 @@
from app import db from app import db, docker_client
from app.models import ( from app.models import (
Job, Job,
JobResult, JobResult,
@ -15,39 +15,38 @@ import os
import shutil import shutil
class CheckJobsMixin: def check_jobs():
def check_jobs(self):
jobs = Job.query.all() jobs = Job.query.all()
for job in (x for x in jobs if x.status == JobStatus.SUBMITTED): for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
self.create_job_service(job) _create_job_service(job)
for job in (x for x in jobs if x.status in [JobStatus.QUEUED, JobStatus.RUNNING]): # noqa for job in [x for x in jobs if x.status in [JobStatus.QUEUED, JobStatus.RUNNING]]:
self.checkout_job_service(job) _checkout_job_service(job)
for job in (x for x in jobs if x.status == JobStatus.CANCELING): for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
self.remove_job_service(job) _remove_job_service(job)
def create_job_service(self, job): def _create_job_service(job):
''' # Docker service settings # ''' ''' # Docker service settings # '''
''' ## Service specific settings ## ''' ''' ## Service specific settings ## '''
if job.service == 'file-setup-pipeline': if job.service == 'file-setup-pipeline':
mem_mb = 512 mem_mb = 512
n_cores = 2 n_cores = 2
executable = 'file-setup-pipeline' executable = 'file-setup-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup-pipeline:v{job.service_version}'
elif job.service == 'tesseract-ocr-pipeline': elif job.service == 'tesseract-ocr-pipeline':
mem_mb = 1024 mem_mb = 1024
n_cores = 4 n_cores = 4
executable = 'tesseract-ocr-pipeline' executable = 'tesseract-ocr-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}tesseract-ocr-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}tesseract-ocr-pipeline:v{job.service_version}'
elif job.service == 'transkribus-htr-pipeline': elif job.service == 'transkribus-htr-pipeline':
mem_mb = 1024 mem_mb = 1024
n_cores = 4 n_cores = 4
executable = 'transkribus-htr-pipeline' executable = 'transkribus-htr-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}transkribus-htr-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}transkribus-htr-pipeline:v{job.service_version}'
elif job.service == 'spacy-nlp-pipeline': elif job.service == 'spacy-nlp-pipeline':
mem_mb = 1024 mem_mb = 1024
n_cores = 1 n_cores = 1
executable = 'spacy-nlp-pipeline' executable = 'spacy-nlp-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}spacy-nlp-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}spacy-nlp-pipeline:v{job.service_version}'
''' ## Command ## ''' ''' ## Command ## '''
command = f'{executable} -i /input -o /output' command = f'{executable} -i /input -o /output'
command += ' --log-dir /logs' command += ' --log-dir /logs'
@ -55,7 +54,7 @@ class CheckJobsMixin:
command += f' --n-cores {n_cores}' command += f' --n-cores {n_cores}'
if job.service == 'spacy-nlp-pipeline': if job.service == 'spacy-nlp-pipeline':
command += f' -m {job.service_args["model"]}' command += f' -m {job.service_args["model"]}'
if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']: # noqa if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']:
command += ' --check-encoding' command += ' --check-encoding'
elif job.service == 'tesseract-ocr-pipeline': elif job.service == 'tesseract-ocr-pipeline':
command += f' -m {job.service_args["model"]}' command += f' -m {job.service_args["model"]}'
@ -86,7 +85,7 @@ class CheckJobsMixin:
input_mount_target_base += f'/{secure_filename(job.title)}' input_mount_target_base += f'/{secure_filename(job.title)}'
for job_input in job.inputs: for job_input in job.inputs:
input_mount_source = job_input.path input_mount_source = job_input.path
input_mount_target = f'{input_mount_target_base}/{job_input.filename}' # noqa input_mount_target = f'{input_mount_target_base}/{job_input.filename}'
input_mount = f'{input_mount_source}:{input_mount_target}:ro' input_mount = f'{input_mount_source}:{input_mount_target}:ro'
mounts.append(input_mount) mounts.append(input_mount)
if job.service == 'tesseract-ocr-pipeline': if job.service == 'tesseract-ocr-pipeline':
@ -109,7 +108,7 @@ class CheckJobsMixin:
''' ### Pipeline data mount ### ''' ''' ### Pipeline data mount ### '''
pyflow_data_mount_source = os.path.join(job.path, 'pipeline_data') pyflow_data_mount_source = os.path.join(job.path, 'pipeline_data')
pyflow_data_mount_target = '/logs/pyflow.data' pyflow_data_mount_target = '/logs/pyflow.data'
pyflow_data_mount = f'{pyflow_data_mount_source}:{pyflow_data_mount_target}:rw' # noqa pyflow_data_mount = f'{pyflow_data_mount_source}:{pyflow_data_mount_target}:rw'
# Make sure that their is no data in the output directory # Make sure that their is no data in the output directory
shutil.rmtree(pyflow_data_mount_source, ignore_errors=True) shutil.rmtree(pyflow_data_mount_source, ignore_errors=True)
os.makedirs(pyflow_data_mount_source) os.makedirs(pyflow_data_mount_source)
@ -124,7 +123,7 @@ class CheckJobsMixin:
''' ## Restart policy ## ''' ''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy() restart_policy = docker.types.RestartPolicy()
try: try:
self.docker.services.create( docker_client.services.create(
image, image,
command=command, command=command,
constraints=constraints, constraints=constraints,
@ -143,10 +142,10 @@ class CheckJobsMixin:
return return
job.status = JobStatus.QUEUED job.status = JobStatus.QUEUED
def checkout_job_service(self, job): def _checkout_job_service(job):
service_name = f'job_{job.id}' service_name = f'job_{job.id}'
try: try:
service = self.docker.services.get(service_name) service = docker_client.services.get(service_name)
except docker.errors.NotFound as e: except docker.errors.NotFound as e:
current_app.logger.error( current_app.logger.error(
f'Get service "{service_name}" failed ' f'Get service "{service_name}" failed '
@ -167,7 +166,7 @@ class CheckJobsMixin:
if job.status == JobStatus.QUEUED and task_state != 'pending': if job.status == JobStatus.QUEUED and task_state != 'pending':
job.status = JobStatus.RUNNING job.status = JobStatus.RUNNING
return return
elif job.status == JobStatus.RUNNING and task_state == 'complete': # noqa elif job.status == JobStatus.RUNNING and task_state == 'complete':
job.status = JobStatus.COMPLETED job.status = JobStatus.COMPLETED
results_dir = os.path.join(job.path, 'results') results_dir = os.path.join(job.path, 'results')
with open(os.path.join(results_dir, 'outputs.json')) as f: with open(os.path.join(results_dir, 'outputs.json')) as f:
@ -201,10 +200,10 @@ class CheckJobsMixin:
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )
def remove_job_service(self, job): def _remove_job_service(job):
service_name = f'job_{job.id}' service_name = f'job_{job.id}'
try: try:
service = self.docker.services.get(service_name) service = docker_client.services.get(service_name)
except docker.errors.NotFound: except docker.errors.NotFound:
job.status = JobStatus.CANCELED job.status = JobStatus.CANCELED
return return

View File

@ -1,8 +1,8 @@
from app.models import Permission
from flask import abort, current_app from flask import abort, current_app
from flask_login import current_user from flask_login import current_user
from functools import wraps from functools import wraps
from threading import Thread from threading import Thread
from .models import Permission
def permission_required(permission): def permission_required(permission):

View File

@ -1,27 +1,27 @@
from flask import current_app, render_template from app import mail
from flask import current_app, Flask, render_template
from flask_mail import Message from flask_mail import Message
from typing import Any, Text from threading import Thread
from . import mail from typing import Any
from .decorators import background
def create_message( def create_message(recipient: str, subject: str, template: str, **kwargs: Any) -> Message:
recipient: str,
subject: str,
template: str,
**kwargs: Any
) -> Message:
subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX'] subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
msg: Message = Message( msg: Message = Message(
f'{subject_prefix} {subject}', body=render_template(f'{template}.txt.j2', **kwargs),
recipients=[recipient] html=render_template(f'{template}.html.j2', **kwargs),
recipients=[recipient],
subject=f'{subject_prefix} {subject}'
) )
msg.body = render_template(f'{template}.txt.j2', **kwargs)
msg.html = render_template(f'{template}.html.j2', **kwargs)
return msg return msg
@background def _send(app: Flask, msg):
def send(msg: Message, *args, **kwargs): with app.app_context():
with kwargs['app'].app_context():
mail.send(msg) mail.send(msg)
def send(msg: Message, *args, **kwargs):
thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
thread.start()
return thread

View File

@ -58,6 +58,23 @@ def download_job_input(job_id, job_input_id):
) )
@bp.route('/<hashid:job_id>/log')
@login_required
@admin_required
def job_log(job_id):
job = Job.query.get_or_404(job_id)
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
flash(
f'Can\'t restart job "{job.title}": Status is not "Completed/Failed"', # noqa
category='error'
)
return send_from_directory(
attachment_filename=f'job_{job.hashid}_log.txt',
directory=os.path.join(job.path, 'pipeline_data'),
filename=os.path.join('logs', 'pyflow_log.txt')
)
@bp.route('/<hashid:job_id>/restart') @bp.route('/<hashid:job_id>/restart')
@login_required @login_required
@admin_required @admin_required

View File

@ -2,4 +2,4 @@ from flask import Blueprint
bp = Blueprint('main', __name__) bp = Blueprint('main', __name__)
from . import routes from . import events, routes

View File

@ -1,8 +1,8 @@
from app import hashids, socketio from app import hashids, socketio
from app.decorators import socketio_login_required from app.models import User
from app.models import TesseractOCRModel, TranskribusHTRModel, User
from flask_login import current_user from flask_login import current_user
from flask_socketio import join_room from flask_socketio import join_room
from app.decorators import socketio_login_required
@socketio.on('users.user.get') @socketio.on('users.user.get')

View File

@ -1,8 +1,8 @@
from app import db, login from app import db, login, mail, socketio
from app.converters.vrt import normalize_vrt_file from app.converters.vrt import normalize_vrt_file
from app.sqlalchemy_type_decorators import ContainerColumn, IntEnumColumn from app.email import create_message
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import IntEnum from enum import Enum, IntEnum
from flask import current_app, url_for from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from flask_login import UserMixin from flask_login import UserMixin
@ -20,9 +20,57 @@ import yaml
TRANSKRIBUS_HTR_MODELS = \ TRANSKRIBUS_HTR_MODELS = \
json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text').content)['trpModelMetadata'] # noqa json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa
##############################################################################
# enums #
##############################################################################
# region enums
class CorpusStatus(IntEnum):
UNPREPARED = 1
SUBMITTED = 2
QUEUED = 3
BUILDING = 4
BUILT = 5
FAILED = 6
STARTING_ANALYSIS_SESSION = 7
RUNNING_ANALYSIS_SESSION = 8
CANCELING_ANALYSIS_SESSION = 9
class JobStatus(IntEnum):
INITIALIZING = 1
SUBMITTED = 2
QUEUED = 3
RUNNING = 4
CANCELING = 5
CANCELED = 6
COMPLETED = 7
FAILED = 8
class Permission(IntEnum):
'''
Defines User permissions as integers by the power of 2. User permission
can be evaluated using the bitwise operator &.
'''
ADMINISTRATE = 1
CONTRIBUTE = 2
USE_API = 4
class UserSettingJobStatusMailNotificationLevel(IntEnum):
NONE = 1
END = 2
ALL = 3
# endregion enums
##############################################################################
# mixins #
##############################################################################
# region mixins
class FileMixin: class FileMixin:
''' '''
Mixin for db.Model classes. All file related models should use this. Mixin for db.Model classes. All file related models should use this.
@ -39,18 +87,59 @@ class FileMixin:
'last_edited_date': self.last_edited_date.isoformat() + 'Z', 'last_edited_date': self.last_edited_date.isoformat() + 'Z',
'mimetype': self.mimetype 'mimetype': self.mimetype
} }
# endregion mixins
class Permission(IntEnum): ##############################################################################
''' # type_decorators #
Defines User permissions as integers by the power of 2. User permission ##############################################################################
can be evaluated using the bitwise operator &. # region type_decorators
''' class IntEnumColumn(db.TypeDecorator):
ADMINISTRATE = 1 impl = db.Integer
CONTRIBUTE = 2
USE_API = 4 def __init__(self, enum_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.enum_type = enum_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.enum_type) and isinstance(value.value, int):
return value.value
elif isinstance(value, int):
return self.enum_type(value).value
else:
return TypeError()
def process_result_value(self, value, dialect):
return self.enum_type(value)
class ContainerColumn(db.TypeDecorator):
impl = db.String
def __init__(self, container_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.container_type = container_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.container_type):
return json.dumps(value)
elif (
isinstance(value, str)
and isinstance(json.loads(value), self.container_type)
):
return value
else:
return TypeError()
def process_result_value(self, value, dialect):
return json.loads(value)
# endregion type_decorators
##############################################################################
# Models #
##############################################################################
# region models
class Role(HashidMixin, db.Model): class Role(HashidMixin, db.Model):
__tablename__ = 'roles' __tablename__ = 'roles'
# Primary key # Primary key
@ -123,12 +212,6 @@ class Role(HashidMixin, db.Model):
db.session.commit() db.session.commit()
class UserSettingJobStatusMailNotificationLevel(IntEnum):
NONE = 1
END = 2
ALL = 3
class User(HashidMixin, UserMixin, db.Model): class User(HashidMixin, UserMixin, db.Model):
__tablename__ = 'users' __tablename__ = 'users'
# Primary key # Primary key
@ -449,7 +532,6 @@ class TranskribusHTRModel(HashidMixin, db.Model):
'user_id': self.user.hashid, 'user_id': self.user.hashid,
'shared': self.shared, 'shared': self.shared,
'transkribus_model_id': self.transkribus_model_id, 'transkribus_model_id': self.transkribus_model_id,
'transkribus_name': self.transkribus_name
} }
if backrefs: if backrefs:
dict_tesseract_ocr_model['user'] = \ dict_tesseract_ocr_model['user'] = \
@ -466,20 +548,14 @@ class TranskribusHTRModel(HashidMixin, db.Model):
# and 'creator' in m and m['creator'] == 'Transkribus Team' # and 'creator' in m and m['creator'] == 'Transkribus Team'
# and 'docType' in m and m['docType'] == 'handwritten' # and 'docType' in m and m['docType'] == 'handwritten'
# ] # ]
models = [ for m in TRANSKRIBUS_HTR_MODELS:
m for m in TRANSKRIBUS_HTR_MODELS
if m['modelId'] in [35909, 33744, 33597, 29820, 37789, 13685, 37855, 26124, 37738, 30919, 34763]
]
for m in models:
model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
if model is not None: if model is not None:
model.shared = True model.shared = True
model.transkribus_model_id = m['modelId'] model.transkribus_model_id = m['modelId']
model.transkribus_name = m['name']
continue continue
model = TranskribusHTRModel( model = TranskribusHTRModel(
shared=True, shared=True,
transkribus_name=m['name'],
transkribus_model_id=m['modelId'], transkribus_model_id=m['modelId'],
user=user, user=user,
) )
@ -605,17 +681,6 @@ class JobResult(FileMixin, HashidMixin, db.Model):
return self.job.user_id return self.job.user_id
class JobStatus(IntEnum):
INITIALIZING = 1
SUBMITTED = 2
QUEUED = 3
RUNNING = 4
CANCELING = 5
CANCELED = 6
COMPLETED = 7
FAILED = 8
class Job(HashidMixin, db.Model): class Job(HashidMixin, db.Model):
''' '''
Class to define Jobs. Class to define Jobs.
@ -828,18 +893,6 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
return dict_corpus_file return dict_corpus_file
class CorpusStatus(IntEnum):
UNPREPARED = 1
SUBMITTED = 2
QUEUED = 3
BUILDING = 4
BUILT = 5
FAILED = 6
STARTING_ANALYSIS_SESSION = 7
RUNNING_ANALYSIS_SESSION = 8
CANCELING_ANALYSIS_SESSION = 9
class Corpus(HashidMixin, db.Model): class Corpus(HashidMixin, db.Model):
''' '''
Class to define a corpus. Class to define a corpus.
@ -964,8 +1017,95 @@ class Corpus(HashidMixin, db.Model):
for x in self.files for x in self.files
} }
return dict_corpus return dict_corpus
# endregion models
##############################################################################
# event_handlers #
##############################################################################
# region event_handlers
@db.event.listens_for(Corpus, 'after_delete')
@db.event.listens_for(CorpusFile, 'after_delete')
@db.event.listens_for(Job, 'after_delete')
@db.event.listens_for(JobInput, 'after_delete')
@db.event.listens_for(JobResult, 'after_delete')
def ressource_after_delete(mapper, connection, ressource):
jsonpatch = [{'op': 'remove', 'path': ressource.jsonpatch_path}]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_insert')
@db.event.listens_for(CorpusFile, 'after_insert')
@db.event.listens_for(Job, 'after_insert')
@db.event.listens_for(JobInput, 'after_insert')
@db.event.listens_for(JobResult, 'after_insert')
def ressource_after_insert_handler(mapper, connection, ressource):
value = ressource.to_dict(backrefs=False, relationships=False)
for attr in mapper.relationships:
value[attr.key] = {}
jsonpatch = [
{'op': 'add', 'path': ressource.jsonpatch_path, 'value': value}
]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_update')
@db.event.listens_for(CorpusFile, 'after_update')
@db.event.listens_for(Job, 'after_update')
@db.event.listens_for(JobInput, 'after_update')
@db.event.listens_for(JobResult, 'after_update')
def ressource_after_update_handler(mapper, connection, ressource):
jsonpatch = []
for attr in db.inspect(ressource).attrs:
if attr.key in mapper.relationships:
continue
if not attr.load_history().has_changes():
continue
if isinstance(attr.value, datetime):
value = attr.value.isoformat() + 'Z'
elif isinstance(attr.value, Enum):
value = attr.value.name
else:
value = attr.value
jsonpatch.append(
{
'op': 'replace',
'path': f'{ressource.jsonpatch_path}/{attr.key}',
'value': value
}
)
if jsonpatch:
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Job, 'after_update')
def job_after_update_handler(mapper, connection, job):
for attr in db.inspect(job).attrs:
if attr.key != 'status':
continue
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return
msg = create_message(
job.user.email,
f'Status update for your Job "{job.title}"',
'tasks/email/notification',
job=job
)
mail.send(msg)
# endregion event_handlers
##############################################################################
# misc #
##############################################################################
# region misc
@login.user_loader @login.user_loader
def load_user(user_id): def load_user(user_id):
return User.query.get(int(user_id)) return User.query.get(int(user_id))
# endregion misc

View File

@ -144,7 +144,6 @@ def tesseract_ocr_pipeline():
x for x in TesseractOCRModel.query.filter().all() x for x in TesseractOCRModel.query.filter().all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
] ]
current_app.logger.warning(tesseract_ocr_models)
return render_template( return render_template(
'services/tesseract_ocr_pipeline.html.j2', 'services/tesseract_ocr_pipeline.html.j2',
form=form, form=form,

View File

@ -1,87 +0,0 @@
from app import db, mail, socketio
from app.email import create_message
from app.models import (
Corpus,
CorpusFile,
Job,
JobInput,
JobResult,
JobStatus,
UserSettingJobStatusMailNotificationLevel
)
from datetime import datetime
from enum import Enum
@db.event.listens_for(Corpus, 'after_delete')
@db.event.listens_for(CorpusFile, 'after_delete')
@db.event.listens_for(Job, 'after_delete')
@db.event.listens_for(JobInput, 'after_delete')
@db.event.listens_for(JobResult, 'after_delete')
def ressource_after_delete(mapper, connection, ressource):
jsonpatch = [{'op': 'remove', 'path': ressource.jsonpatch_path}]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_insert')
@db.event.listens_for(CorpusFile, 'after_insert')
@db.event.listens_for(Job, 'after_insert')
@db.event.listens_for(JobInput, 'after_insert')
@db.event.listens_for(JobResult, 'after_insert')
def ressource_after_insert_handler(mapper, connection, ressource):
value = ressource.to_dict(backrefs=False, relationships=False)
for attr in mapper.relationships:
value[attr.key] = {}
jsonpatch = [
{'op': 'add', 'path': ressource.jsonpatch_path, 'value': value}
]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_update')
@db.event.listens_for(CorpusFile, 'after_update')
@db.event.listens_for(Job, 'after_update')
@db.event.listens_for(JobInput, 'after_update')
@db.event.listens_for(JobResult, 'after_update')
def ressource_after_update_handler(mapper, connection, ressource):
jsonpatch = []
for attr in db.inspect(ressource).attrs:
if attr.key in mapper.relationships:
continue
if not attr.load_history().has_changes():
continue
if isinstance(attr.value, datetime):
value = attr.value.isoformat() + 'Z'
elif isinstance(attr.value, Enum):
value = attr.value.name
else:
value = attr.value
jsonpatch.append(
{
'op': 'replace',
'path': f'{ressource.jsonpatch_path}/{attr.key}',
'value': value
}
)
if isinstance(ressource, Job) and attr.key == 'status':
_job_status_email_handler(ressource)
if jsonpatch:
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
def _job_status_email_handler(job):
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE: # noqa
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END: # noqa
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return
msg = create_message(
job.user.email,
f'Status update for your Job "{job.title}"',
'tasks/email/notification',
job=job
)
mail.send(msg)

View File

@ -1,43 +0,0 @@
from app import db
import json
class IntEnumColumn(db.TypeDecorator):
impl = db.Integer
def __init__(self, enum_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.enum_type = enum_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.enum_type) and isinstance(value.value, int):
return value.value
elif isinstance(value, int):
return self.enum_type(value).value
else:
return TypeError()
def process_result_value(self, value, dialect):
return self.enum_type(value)
class ContainerColumn(db.TypeDecorator):
impl = db.String
def __init__(self, container_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.container_type = container_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.container_type):
return json.dumps(value)
elif (
isinstance(value, str)
and isinstance(json.loads(value), self.container_type)
):
return value
else:
return TypeError()
def process_result_value(self, value, dialect):
return json.loads(value)

View File

@ -69,6 +69,14 @@ class JobDisplay extends RessourceDisplay {
element.classList.remove('hide'); element.classList.remove('hide');
} }
} }
elements = this.displayElement.querySelectorAll('.job-log-trigger');
for (element of elements) {
if (['COMPLETED', 'FAILED'].includes(status)) {
element.classList.remove('hide');
} else {
element.classList.add('hide');
}
}
elements = this.displayElement.querySelectorAll('.job-restart-trigger'); elements = this.displayElement.querySelectorAll('.job-restart-trigger');
for (element of elements) { for (element of elements) {
if (['COMPLETED', 'FAILED'].includes(status)) { if (['COMPLETED', 'FAILED'].includes(status)) {

9
app/tasks.py Normal file
View File

@ -0,0 +1,9 @@
from app.daemon import daemon
def register(app, scheduler):
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
@scheduler.task('interval', id='daemon', seconds=3)
def daemon_task():
with app.app_context():
daemon()

View File

@ -79,6 +79,7 @@
</div> </div>
<div class="card-action right-align"> <div class="card-action right-align">
{% if current_user.is_administrator() %} {% if current_user.is_administrator() %}
<a class="btn hide modal-trigger job-log-trigger waves-effect waves-light" data-target="job-log-modal"><i class="material-icons left">repeat</i>Log</a>
<a class="btn hide modal-trigger restart-job-trigger waves-effect waves-light" data-target="restart-job-modal"><i class="material-icons left">repeat</i>Restart</a> <a class="btn hide modal-trigger restart-job-trigger waves-effect waves-light" data-target="restart-job-modal"><i class="material-icons left">repeat</i>Restart</a>
{% endif %} {% endif %}
<!-- <a href="#" class="btn disabled waves-effect waves-light"><i class="material-icons left">settings</i>Export Parameters</a> --> <!-- <a href="#" class="btn disabled waves-effect waves-light"><i class="material-icons left">settings</i>Export Parameters</a> -->
@ -143,6 +144,16 @@
{% block modals %} {% block modals %}
{{ super() }} {{ super() }}
<div id="job-log-modal" class="modal">
<div class="modal-content">
<h4>Job logs</h4>
<iframe src="{{ url_for('jobs.job_log', job_id=job.id) }}" style="border: 0; width: 100%; height: 450px;"></iframe>
</div>
<div class="modal-footer">
<a href="#!" class="btn modal-close waves-effect waves-light">Close</a>
</div>
</div>
<div id="delete-job-modal" class="modal"> <div id="delete-job-modal" class="modal">
<div class="modal-content"> <div class="modal-content">
<h4>Confirm deletion</h4> <h4>Confirm deletion</h4>

33
boot.sh
View File

@ -1,28 +1,37 @@
#!/bin/bash #!/bin/bash
source venv/bin/activate source venv/bin/activate
display_help() {
local script_name=$(basename "${0}")
echo ""
echo "Usage: ${script_name} [COMMAND]"
echo ""
echo "Run wrapper for a nopaque instance"
echo ""
echo "Commands:"
echo " flask A general utility script for Flask applications."
echo ""
echo "Run '${script_name} COMMAND --help' for more information on a command."
}
if [[ "${#}" -eq 0 ]]; then if [[ "${#}" -eq 0 ]]; then
if [[ "${NOPAQUE_IS_PRIMARY_INSTANCE:-True}" == "True" ]]; then if [[ "${NOPAQUE_IS_PRIMARY_INSTANCE:-True}" == "True" ]]; then
while true; do while true; do
echo "INFO Run deployment tasks..."
flask deploy flask deploy
if [[ "${?}" == "0" ]]; then if [[ "${?}" == "0" ]]; then
break break
fi fi
echo "WARNING ...Failed, retrying in 5 secs..." echo "Deploy command failed, retrying in 5 secs..."
sleep 5 sleep 5
done done
echo "INFO Start nopaque daemon..."
flask daemon run &
fi fi
echo "INFO Start nopaque..."
python nopaque.py python nopaque.py
elif [[ "${1}" == "flask" ]]; then elif [[ "${1}" == "flask" ]]; then
exec ${@:1} flask "${@:2}"
elif [[ "${1}" == "--help" || "${1}" == "-h" ]]; then
display_help
else else
echo "${0} [COMMAND]" display_help
echo "" exit 1
echo "nopaque startup script"
echo ""
echo "Commands:"
echo " flask"
fi fi

View File

@ -3,7 +3,7 @@
import eventlet import eventlet
eventlet.monkey_patch() eventlet.monkey_patch()
from app import db, cli, create_app, socketio # noqa from app import cli, create_app, db, scheduler, socketio # noqa
from app.models import ( from app.models import (
Corpus, Corpus,
CorpusFile, CorpusFile,
@ -49,6 +49,12 @@ def make_shell_context() -> Dict[str, Any]:
def main(): def main():
with app.app_context():
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0).all():
corpus.num_analysis_sessions = 0
db.session.commit()
scheduler.start()
socketio.run(app, host='0.0.0.0') socketio.run(app, host='0.0.0.0')