Restructure code and use APScheduler for daemon functionality

This commit is contained in:
Patrick Jentsch 2022-06-28 12:30:02 +02:00
parent 7c52d3f392
commit b8bf004684
20 changed files with 755 additions and 710 deletions

View File

@ -1,4 +1,5 @@
from config import Config from config import Config
from docker import DockerClient
from flask import Flask from flask import Flask
from flask_apscheduler import APScheduler from flask_apscheduler import APScheduler
from flask_assets import Environment from flask_assets import Environment
@ -11,18 +12,19 @@ from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids from flask_hashids import Hashids
assets: Environment = Environment() assets = Environment()
db: SQLAlchemy = SQLAlchemy() db = SQLAlchemy()
hashids: Hashids = Hashids() docker_client = DockerClient()
login: LoginManager = LoginManager() hashids = Hashids()
login = LoginManager()
login.login_view = 'auth.login' login.login_view = 'auth.login'
login.login_message = 'Please log in to access this page.' login.login_message = 'Please log in to access this page.'
mail: Mail = Mail() mail = Mail()
migrate: Migrate = Migrate() migrate = Migrate()
paranoid: Paranoid = Paranoid() paranoid = Paranoid()
paranoid.redirect_view = '/' paranoid.redirect_view = '/'
scheduler: APScheduler = APScheduler() # TODO: Use this! scheduler = APScheduler()
socketio: SocketIO = SocketIO() socketio = SocketIO()
def create_app(config: Config = Config) -> Flask: def create_app(config: Config = Config) -> Flask:
@ -30,6 +32,11 @@ def create_app(config: Config = Config) -> Flask:
app: Flask = Flask(__name__) app: Flask = Flask(__name__)
app.config.from_object(config) app.config.from_object(config)
config.init_app(app) config.init_app(app)
docker_client.login(
username=app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
)
assets.init_app(app) assets.init_app(app)
db.init_app(app) db.init_app(app)
@ -38,10 +45,11 @@ def create_app(config: Config = Config) -> Flask:
mail.init_app(app) mail.init_app(app)
migrate.init_app(app, db) migrate.init_app(app, db)
paranoid.init_app(app) paranoid.init_app(app)
scheduler.init_app(app)
socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa socketio.init_app(app, message_queue=app.config['NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI']) # noqa
from app import socketio_event_listeners from . import tasks
from app import sqlalchemy_event_listeners tasks.register(app, scheduler)
from .admin import bp as admin_blueprint from .admin import bp as admin_blueprint
app.register_blueprint(admin_blueprint, url_prefix='/admin') app.register_blueprint(admin_blueprint, url_prefix='/admin')

View File

@ -1,7 +1,6 @@
from app.models import Role, User, TesseractOCRModel, TranskribusHTRModel
from flask import current_app from flask import current_app
from flask_migrate import upgrade from flask_migrate import upgrade
from . import db
from .models import Corpus, Role, User, TesseractOCRModel, TranskribusHTRModel
import click import click
import os import os
@ -41,22 +40,6 @@ def register(app):
current_app.logger.info('Insert/Update default TranskribusHTRModels') current_app.logger.info('Insert/Update default TranskribusHTRModels')
TranskribusHTRModel.insert_defaults() TranskribusHTRModel.insert_defaults()
@app.cli.group()
def daemon():
''' Daemon commands. '''
pass
@daemon.command('run')
def run_daemon():
''' Run daemon '''
corpus: Corpus
for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0):
corpus.num_analysis_sessions = 0
db.session.commit()
from app.daemon import Daemon
daemon: Daemon = Daemon()
daemon.run()
@app.cli.group() @app.cli.group()
def converter(): def converter():
''' Converter commands. ''' ''' Converter commands. '''

View File

@ -1,23 +1,9 @@
from app import db from app import db
from flask import current_app from .corpus_utils import check_corpora
from time import sleep from .job_utils import check_jobs
from .corpus_utils import CheckCorporaMixin
from .job_utils import CheckJobsMixin
import docker
class Daemon(CheckCorporaMixin, CheckJobsMixin): def daemon():
def __init__(self): check_corpora()
self.docker = docker.from_env() check_jobs()
self.docker.login( db.session.commit()
username=current_app.config['NOPAQUE_DOCKER_REGISTRY_USERNAME'],
password=current_app.config['NOPAQUE_DOCKER_REGISTRY_PASSWORD'],
registry=current_app.config['NOPAQUE_DOCKER_REGISTRY']
)
def run(self):
while True:
self.check_corpora()
self.check_jobs()
db.session.commit()
sleep(1.5)

View File

@ -1,3 +1,4 @@
from app import docker_client
from app.models import Corpus, CorpusStatus from app.models import Corpus, CorpusStatus
from flask import current_app from flask import current_app
import docker import docker
@ -5,250 +6,249 @@ import os
import shutil import shutil
class CheckCorporaMixin: def check_corpora():
def check_corpora(self): corpora = Corpus.query.all()
corpora = Corpus.query.all() for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
for corpus in (x for x in corpora if x.status == CorpusStatus.SUBMITTED): # noqa _create_build_corpus_service(corpus)
self.create_build_corpus_service(corpus) for corpus in [x for x in corpora if x.status in [CorpusStatus.QUEUED, CorpusStatus.BUILDING]]:
for corpus in (x for x in corpora if x.status == CorpusStatus.QUEUED or x.status == CorpusStatus.BUILDING): # noqa _checkout_build_corpus_service(corpus)
self.checkout_build_corpus_service(corpus) for corpus in [x for x in corpora if x.status == CorpusStatus.BUILT and x.num_analysis_sessions > 0]:
for corpus in (x for x in corpora if x.status == CorpusStatus.BUILT and x.num_analysis_sessions > 0): # noqa corpus.status = CorpusStatus.STARTING_ANALYSIS_SESSION
corpus.status = CorpusStatus.STARTING_ANALYSIS_SESSION for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
for corpus in (x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0): # noqa corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
for corpus in (x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION): # noqa _checkout_analysing_corpus_container(corpus)
self.checkout_analysing_corpus_container(corpus) for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
for corpus in (x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION): # noqa _create_cqpserver_container(corpus)
self.create_cqpserver_container(corpus) for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
for corpus in (x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION): # noqa _remove_cqpserver_container(corpus)
self.remove_cqpserver_container(corpus)
def create_build_corpus_service(self, corpus): def _create_build_corpus_service(corpus):
''' # Docker service settings # ''' ''' # Docker service settings # '''
''' ## Command ## ''' ''' ## Command ## '''
command = ['bash', '-c'] command = ['bash', '-c']
command.append( command.append(
f'mkdir /corpora/data/nopaque_{corpus.id}' f'mkdir /corpora/data/nopaque_{corpus.id}'
' && ' ' && '
'cwb-encode' 'cwb-encode'
' -c utf8' ' -c utf8'
f' -d /corpora/data/nopaque_{corpus.id}' f' -d /corpora/data/nopaque_{corpus.id}'
' -f /root/files/corpus.vrt' ' -f /root/files/corpus.vrt'
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}' f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}'
' -P pos -P lemma -P simple_pos' ' -P pos -P lemma -P simple_pos'
' -S ent:0+type -S s:0' ' -S ent:0+type -S s:0'
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title' # noqa ' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
' -xsB -9' ' -xsB -9'
' && ' ' && '
f'cwb-make -V NOPAQUE_{corpus.id}' f'cwb-make -V NOPAQUE_{corpus.id}'
)
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
'type': 'corpus.build',
'corpus_id': str(corpus.id)
}
''' ## Mounts ## '''
mounts = []
''' ### Data mount ### '''
data_mount_source = os.path.join(corpus.path, 'cwb', 'data')
data_mount_target = '/corpora/data'
data_mount = f'{data_mount_source}:{data_mount_target}:rw'
# Make sure that their is no data in the data directory
shutil.rmtree(data_mount_source, ignore_errors=True)
os.makedirs(data_mount_source)
mounts.append(data_mount)
''' ### File mount ### '''
file_mount_source = os.path.join(corpus.path, 'cwb', 'corpus.vrt')
file_mount_target = '/root/files/corpus.vrt'
file_mount = f'{file_mount_source}:{file_mount_target}:ro'
mounts.append(file_mount)
''' ### Registry mount ### '''
registry_mount_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_mount_target = '/usr/local/share/cwb/registry'
registry_mount = f'{registry_mount_source}:{registry_mount_target}:rw'
# Make sure that their is no data in the registry directory
shutil.rmtree(registry_mount_source, ignore_errors=True)
os.makedirs(registry_mount_source)
mounts.append(registry_mount)
''' ## Name ## '''
name = f'build-corpus_{corpus.id}'
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
docker_client.services.create(
image,
command=command,
constraints=constraints,
labels=labels,
mounts=mounts,
name=name,
restart_policy=restart_policy,
user='0:0'
) )
''' ## Constraints ## ''' except docker.errors.APIError as e:
constraints = ['node.role==worker'] current_app.logger.error(
''' ## Image ## ''' f'Create service "{name}" failed '
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702' f'due to "docker.errors.APIError": {e}'
''' ## Labels ## '''
labels = {
'origin': current_app.config['SERVER_NAME'],
'type': 'corpus.build',
'corpus_id': str(corpus.id)
}
''' ## Mounts ## '''
mounts = []
''' ### Data mount ### '''
data_mount_source = os.path.join(corpus.path, 'cwb', 'data')
data_mount_target = '/corpora/data'
data_mount = f'{data_mount_source}:{data_mount_target}:rw'
# Make sure that their is no data in the data directory
shutil.rmtree(data_mount_source, ignore_errors=True)
os.makedirs(data_mount_source)
mounts.append(data_mount)
''' ### File mount ### '''
file_mount_source = os.path.join(corpus.path, 'cwb', 'corpus.vrt')
file_mount_target = '/root/files/corpus.vrt'
file_mount = f'{file_mount_source}:{file_mount_target}:ro'
mounts.append(file_mount)
''' ### Registry mount ### '''
registry_mount_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_mount_target = '/usr/local/share/cwb/registry'
registry_mount = f'{registry_mount_source}:{registry_mount_target}:rw'
# Make sure that their is no data in the registry directory
shutil.rmtree(registry_mount_source, ignore_errors=True)
os.makedirs(registry_mount_source)
mounts.append(registry_mount)
''' ## Name ## '''
name = f'build-corpus_{corpus.id}'
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
self.docker.services.create(
image,
command=command,
constraints=constraints,
labels=labels,
mounts=mounts,
name=name,
restart_policy=restart_policy,
user='0:0'
)
except docker.errors.APIError as e:
current_app.logger.error(
f'Create service "{name}" failed '
f'due to "docker.errors.APIError": {e}'
)
return
corpus.status = CorpusStatus.QUEUED
def checkout_build_corpus_service(self, corpus):
service_name = f'build-corpus_{corpus.id}'
try:
service = self.docker.services.get(service_name)
except docker.errors.NotFound as e:
current_app.logger.error(
f'Get service "{service_name}" failed '
f'due to "docker.errors.NotFound": {e}'
)
corpus.status = CorpusStatus.FAILED
return
except docker.errors.APIError as e:
current_app.logger.error(
f'Get service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}'
)
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if corpus.status == CorpusStatus.QUEUED and task_state != 'pending': # noqa
corpus.status = CorpusStatus.BUILDING
return
elif corpus.status == CorpusStatus.BUILDING and task_state == 'complete': # noqa
corpus.status = CorpusStatus.BUILT
elif corpus.status == CorpusStatus.BUILDING and task_state == 'failed': # noqa
corpus.status = CorpusStatus.FAILED
else:
return
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}'
)
def create_cqpserver_container(self, corpus):
''' # Docker container settings # '''
''' ## Command ## '''
command = []
command.append(
'echo "host *;" > cqpserver.init'
' && '
'echo "user anonymous \\"\\";" >> cqpserver.init'
' && '
'cqpserver -I cqpserver.init'
) )
''' ## Detach ## ''' return
detach = True corpus.status = CorpusStatus.QUEUED
''' ## Entrypoint ## '''
entrypoint = ['bash', '-c']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
''' ## Name ## '''
name = f'cqpserver_{corpus.id}'
''' ## Network ## '''
network = 'nopaque_default'
''' ## Volumes ## '''
volumes = []
''' ### Corpus data volume ### '''
data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
data_volume_target = '/corpora/data'
data_volume = f'{data_volume_source}:{data_volume_target}:rw'
volumes.append(data_volume)
''' ### Corpus registry volume ### '''
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_volume_target = '/usr/local/share/cwb/registry'
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw' # noqa
volumes.append(registry_volume)
# Check if a cqpserver container already exists. If this is the case,
# remove it and create a new one
try:
container = self.docker.containers.get(name)
except docker.errors.NotFound:
pass
except docker.errors.APIError as e:
current_app.logger.error(
f'Get container "{name}" failed '
f'due to "docker.errors.APIError": {e}'
)
return
else:
try:
container.remove(force=True)
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove container "{name}" failed '
f'due to "docker.errors.APIError": {e}'
)
return
try:
self.docker.containers.run(
image,
command=command,
detach=detach,
entrypoint=entrypoint,
name=name,
network=network,
user='0:0',
volumes=volumes
)
except docker.errors.ImageNotFound as e:
current_app.logger.error(
f'Run container "{name}" failed '
f'due to "docker.errors.ImageNotFound" error: {e}'
)
corpus.status = CorpusStatus.FAILED
return
except docker.errors.APIError as e:
current_app.logger.error(
f'Run container "{name}" failed '
f'due to "docker.errors.APIError" error: {e}'
)
return
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
def checkout_analysing_corpus_container(self, corpus): def _checkout_build_corpus_service(corpus):
container_name = f'cqpserver_{corpus.id}' service_name = f'build-corpus_{corpus.id}'
try: try:
self.docker.containers.get(container_name) service = docker_client.services.get(service_name)
except docker.errors.NotFound as e: except docker.errors.NotFound as e:
current_app.logger.error( current_app.logger.error(
f'Get container "{container_name}" failed ' f'Get service "{service_name}" failed '
f'due to "docker.errors.NotFound": {e}' f'due to "docker.errors.NotFound": {e}'
) )
corpus.num_analysis_sessions = 0 corpus.status = CorpusStatus.FAILED
corpus.status = CorpusStatus.BUILT return
except docker.errors.APIError as e: except docker.errors.APIError as e:
current_app.logger.error( current_app.logger.error(
f'Get container "{container_name}" failed ' f'Get service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if corpus.status == CorpusStatus.QUEUED and task_state != 'pending':
corpus.status = CorpusStatus.BUILDING
return
elif corpus.status == CorpusStatus.BUILDING and task_state == 'complete':
corpus.status = CorpusStatus.BUILT
elif corpus.status == CorpusStatus.BUILDING and task_state == 'failed':
corpus.status = CorpusStatus.FAILED
else:
return
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}'
)
def remove_cqpserver_container(self, corpus): def _create_cqpserver_container(corpus):
container_name = f'cqpserver_{corpus.id}' ''' # Docker container settings # '''
try: ''' ## Command ## '''
container = self.docker.containers.get(container_name) command = []
except docker.errors.NotFound: command.append(
corpus.status = CorpusStatus.BUILT 'echo "host *;" > cqpserver.init'
return ' && '
except docker.errors.APIError as e: 'echo "user anonymous \\"\\";" >> cqpserver.init'
current_app.logger.error( ' && '
f'Get container "{container_name}" failed ' 'cqpserver -I cqpserver.init'
f'due to "docker.errors.APIError": {e}' )
) ''' ## Detach ## '''
return detach = True
''' ## Entrypoint ## '''
entrypoint = ['bash', '-c']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1702'
''' ## Name ## '''
name = f'cqpserver_{corpus.id}'
''' ## Network ## '''
network = 'nopaque_default'
''' ## Volumes ## '''
volumes = []
''' ### Corpus data volume ### '''
data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
data_volume_target = '/corpora/data'
data_volume = f'{data_volume_source}:{data_volume_target}:rw'
volumes.append(data_volume)
''' ### Corpus registry volume ### '''
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_volume_target = '/usr/local/share/cwb/registry'
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
volumes.append(registry_volume)
# Check if a cqpserver container already exists. If this is the case,
# remove it and create a new one
try:
container = docker_client.containers.get(name)
except docker.errors.NotFound:
pass
except docker.errors.APIError as e:
current_app.logger.error(
f'Get container "{name}" failed '
f'due to "docker.errors.APIError": {e}'
)
return
else:
try: try:
container.remove(force=True) container.remove(force=True)
except docker.errors.APIError as e: except docker.errors.APIError as e:
current_app.logger.error( current_app.logger.error(
f'Remove container "{container_name}" failed ' f'Remove container "{name}" failed '
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )
return
try:
docker_client.containers.run(
image,
command=command,
detach=detach,
entrypoint=entrypoint,
name=name,
network=network,
user='0:0',
volumes=volumes
)
except docker.errors.ImageNotFound as e:
current_app.logger.error(
f'Run container "{name}" failed '
f'due to "docker.errors.ImageNotFound" error: {e}'
)
corpus.status = CorpusStatus.FAILED
return
except docker.errors.APIError as e:
current_app.logger.error(
f'Run container "{name}" failed '
f'due to "docker.errors.APIError" error: {e}'
)
return
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
def _checkout_analysing_corpus_container(corpus):
container_name = f'cqpserver_{corpus.id}'
try:
docker_client.containers.get(container_name)
except docker.errors.NotFound as e:
current_app.logger.error(
f'Get container "{container_name}" failed '
f'due to "docker.errors.NotFound": {e}'
)
corpus.num_analysis_sessions = 0
corpus.status = CorpusStatus.BUILT
except docker.errors.APIError as e:
current_app.logger.error(
f'Get container "{container_name}" failed '
f'due to "docker.errors.APIError": {e}'
)
def _remove_cqpserver_container(corpus):
container_name = f'cqpserver_{corpus.id}'
try:
container = docker_client.containers.get(container_name)
except docker.errors.NotFound:
corpus.status = CorpusStatus.BUILT
return
except docker.errors.APIError as e:
current_app.logger.error(
f'Get container "{container_name}" failed '
f'due to "docker.errors.APIError": {e}'
)
return
try:
container.remove(force=True)
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove container "{container_name}" failed '
f'due to "docker.errors.APIError": {e}'
)

View File

@ -1,4 +1,4 @@
from app import db from app import db, docker_client
from app.models import ( from app.models import (
Job, Job,
JobResult, JobResult,
@ -15,217 +15,216 @@ import os
import shutil import shutil
class CheckJobsMixin: def check_jobs():
def check_jobs(self): jobs = Job.query.all()
jobs = Job.query.all() for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
for job in (x for x in jobs if x.status == JobStatus.SUBMITTED): _create_job_service(job)
self.create_job_service(job) for job in [x for x in jobs if x.status in [JobStatus.QUEUED, JobStatus.RUNNING]]:
for job in (x for x in jobs if x.status in [JobStatus.QUEUED, JobStatus.RUNNING]): # noqa _checkout_job_service(job)
self.checkout_job_service(job) for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
for job in (x for x in jobs if x.status == JobStatus.CANCELING): _remove_job_service(job)
self.remove_job_service(job)
def create_job_service(self, job): def _create_job_service(job):
''' # Docker service settings # ''' ''' # Docker service settings # '''
''' ## Service specific settings ## ''' ''' ## Service specific settings ## '''
if job.service == 'file-setup-pipeline': if job.service == 'file-setup-pipeline':
mem_mb = 512 mem_mb = 512
n_cores = 2 n_cores = 2
executable = 'file-setup-pipeline' executable = 'file-setup-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup-pipeline:v{job.service_version}'
elif job.service == 'tesseract-ocr-pipeline': elif job.service == 'tesseract-ocr-pipeline':
mem_mb = 1024 mem_mb = 1024
n_cores = 4 n_cores = 4
executable = 'tesseract-ocr-pipeline' executable = 'tesseract-ocr-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}tesseract-ocr-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}tesseract-ocr-pipeline:v{job.service_version}'
elif job.service == 'transkribus-htr-pipeline': elif job.service == 'transkribus-htr-pipeline':
mem_mb = 1024 mem_mb = 1024
n_cores = 4 n_cores = 4
executable = 'transkribus-htr-pipeline' executable = 'transkribus-htr-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}transkribus-htr-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}transkribus-htr-pipeline:v{job.service_version}'
elif job.service == 'spacy-nlp-pipeline': elif job.service == 'spacy-nlp-pipeline':
mem_mb = 1024 mem_mb = 1024
n_cores = 1 n_cores = 1
executable = 'spacy-nlp-pipeline' executable = 'spacy-nlp-pipeline'
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}spacy-nlp-pipeline:v{job.service_version}' # noqa image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}spacy-nlp-pipeline:v{job.service_version}'
''' ## Command ## ''' ''' ## Command ## '''
command = f'{executable} -i /input -o /output' command = f'{executable} -i /input -o /output'
command += ' --log-dir /logs' command += ' --log-dir /logs'
command += f' --mem-mb {mem_mb}' command += f' --mem-mb {mem_mb}'
command += f' --n-cores {n_cores}' command += f' --n-cores {n_cores}'
if job.service == 'spacy-nlp-pipeline': if job.service == 'spacy-nlp-pipeline':
command += f' -m {job.service_args["model"]}' command += f' -m {job.service_args["model"]}'
if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']: # noqa if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']:
command += ' --check-encoding' command += ' --check-encoding'
elif job.service == 'tesseract-ocr-pipeline': elif job.service == 'tesseract-ocr-pipeline':
command += f' -m {job.service_args["model"]}' command += f' -m {job.service_args["model"]}'
if 'binarization' in job.service_args and job.service_args['binarization']: if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize' command += ' --binarize'
elif job.service == 'transkribus-htr-pipeline': elif job.service == 'transkribus-htr-pipeline':
transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model']) transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model'])
command += f' -m {transkribus_htr_model.transkribus_model_id}' command += f' -m {transkribus_htr_model.transkribus_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME') readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"' command += f' --readcoop-username "{readcoop_username}"'
readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD') readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD')
command += f' --readcoop-password "{readcoop_password}"' command += f' --readcoop-password "{readcoop_password}"'
if 'binarization' in job.service_args and job.service_args['binarization']: if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize' command += ' --binarize'
''' ## Constraints ## ''' ''' ## Constraints ## '''
constraints = ['node.role==worker'] constraints = ['node.role==worker']
''' ## Labels ## ''' ''' ## Labels ## '''
labels = { labels = {
'origin': current_app.config['SERVER_NAME'], 'origin': current_app.config['SERVER_NAME'],
'type': 'job', 'type': 'job',
'job_id': str(job.id) 'job_id': str(job.id)
} }
''' ## Mounts ## ''' ''' ## Mounts ## '''
mounts = [] mounts = []
''' ### Input mount(s) ### ''' ''' ### Input mount(s) ### '''
input_mount_target_base = '/input' input_mount_target_base = '/input'
if job.service == 'file-setup-pipeline': if job.service == 'file-setup-pipeline':
input_mount_target_base += f'/{secure_filename(job.title)}' input_mount_target_base += f'/{secure_filename(job.title)}'
for job_input in job.inputs: for job_input in job.inputs:
input_mount_source = job_input.path input_mount_source = job_input.path
input_mount_target = f'{input_mount_target_base}/{job_input.filename}' # noqa input_mount_target = f'{input_mount_target_base}/{job_input.filename}'
input_mount = f'{input_mount_source}:{input_mount_target}:ro' input_mount = f'{input_mount_source}:{input_mount_target}:ro'
mounts.append(input_mount) mounts.append(input_mount)
if job.service == 'tesseract-ocr-pipeline': if job.service == 'tesseract-ocr-pipeline':
model = TesseractOCRModel.query.get(job.service_args['model']) model = TesseractOCRModel.query.get(job.service_args['model'])
if model is None: if model is None:
job.status = JobStatus.FAILED job.status = JobStatus.FAILED
return return
models_mount_source = model.path models_mount_source = model.path
models_mount_target = f'/usr/local/share/tessdata/{model.filename}' models_mount_target = f'/usr/local/share/tessdata/{model.filename}'
models_mount = f'{models_mount_source}:{models_mount_target}:ro' models_mount = f'{models_mount_source}:{models_mount_target}:ro'
mounts.append(models_mount) mounts.append(models_mount)
''' ### Output mount ### ''' ''' ### Output mount ### '''
output_mount_source = os.path.join(job.path, 'results') output_mount_source = os.path.join(job.path, 'results')
output_mount_target = '/output' output_mount_target = '/output'
output_mount = f'{output_mount_source}:{output_mount_target}:rw' output_mount = f'{output_mount_source}:{output_mount_target}:rw'
# Make sure that their is no data in the output directory # Make sure that their is no data in the output directory
shutil.rmtree(output_mount_source, ignore_errors=True) shutil.rmtree(output_mount_source, ignore_errors=True)
os.makedirs(output_mount_source) os.makedirs(output_mount_source)
mounts.append(output_mount) mounts.append(output_mount)
''' ### Pipeline data mount ### ''' ''' ### Pipeline data mount ### '''
pyflow_data_mount_source = os.path.join(job.path, 'pipeline_data') pyflow_data_mount_source = os.path.join(job.path, 'pipeline_data')
pyflow_data_mount_target = '/logs/pyflow.data' pyflow_data_mount_target = '/logs/pyflow.data'
pyflow_data_mount = f'{pyflow_data_mount_source}:{pyflow_data_mount_target}:rw' # noqa pyflow_data_mount = f'{pyflow_data_mount_source}:{pyflow_data_mount_target}:rw'
# Make sure that their is no data in the output directory # Make sure that their is no data in the output directory
shutil.rmtree(pyflow_data_mount_source, ignore_errors=True) shutil.rmtree(pyflow_data_mount_source, ignore_errors=True)
os.makedirs(pyflow_data_mount_source) os.makedirs(pyflow_data_mount_source)
mounts.append(pyflow_data_mount) mounts.append(pyflow_data_mount)
''' ## Name ## ''' ''' ## Name ## '''
name = f'job_{job.id}' name = f'job_{job.id}'
''' ## Resources ## ''' ''' ## Resources ## '''
resources = docker.types.Resources( resources = docker.types.Resources(
cpu_reservation=n_cores * (10 ** 9), cpu_reservation=n_cores * (10 ** 9),
mem_reservation=mem_mb * (10 ** 6) mem_reservation=mem_mb * (10 ** 6)
)
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
docker_client.services.create(
image,
command=command,
constraints=constraints,
labels=labels,
mounts=mounts,
name=name,
resources=resources,
restart_policy=restart_policy,
user='0:0'
) )
''' ## Restart policy ## ''' except docker.errors.APIError as e:
restart_policy = docker.types.RestartPolicy() current_app.logger.error(
try: f'Create service "{name}" failed '
self.docker.services.create( f'due to "docker.errors.APIError": {e}'
image, )
command=command, return
constraints=constraints, job.status = JobStatus.QUEUED
labels=labels,
mounts=mounts,
name=name,
resources=resources,
restart_policy=restart_policy,
user='0:0'
)
except docker.errors.APIError as e:
current_app.logger.error(
f'Create service "{name}" failed '
f'due to "docker.errors.APIError": {e}'
)
return
job.status = JobStatus.QUEUED
def checkout_job_service(self, job): def _checkout_job_service(job):
service_name = f'job_{job.id}' service_name = f'job_{job.id}'
try: try:
service = self.docker.services.get(service_name) service = docker_client.services.get(service_name)
except docker.errors.NotFound as e: except docker.errors.NotFound as e:
current_app.logger.error( current_app.logger.error(
f'Get service "{service_name}" failed ' f'Get service "{service_name}" failed '
f'due to "docker.errors.NotFound": {e}' f'due to "docker.errors.NotFound": {e}'
)
job.status = JobStatus.FAILED
return
except docker.errors.APIError as e:
current_app.logger.error(
f'Get service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}'
)
return
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if job.status == JobStatus.QUEUED and task_state != 'pending':
job.status = JobStatus.RUNNING
return
elif job.status == JobStatus.RUNNING and task_state == 'complete':
job.status = JobStatus.COMPLETED
results_dir = os.path.join(job.path, 'results')
with open(os.path.join(results_dir, 'outputs.json')) as f:
outputs = json.load(f)
for output in outputs:
filename = os.path.basename(output['file'])
job_result = JobResult(
filename=filename,
job=job,
mimetype=output['mimetype']
) )
job.status = JobStatus.FAILED if 'description' in output:
return job_result.description = output['description']
except docker.errors.APIError as e: db.session.add(job_result)
current_app.logger.error( db.session.flush(objects=[job_result])
f'Get service "{service_name}" failed ' db.session.refresh(job_result)
f'due to "docker.errors.APIError": {e}' os.rename(
) os.path.join(results_dir, output['file']),
return job_result.path
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if job.status == JobStatus.QUEUED and task_state != 'pending':
job.status = JobStatus.RUNNING
return
elif job.status == JobStatus.RUNNING and task_state == 'complete': # noqa
job.status = JobStatus.COMPLETED
results_dir = os.path.join(job.path, 'results')
with open(os.path.join(results_dir, 'outputs.json')) as f:
outputs = json.load(f)
for output in outputs:
filename = os.path.basename(output['file'])
job_result = JobResult(
filename=filename,
job=job,
mimetype=output['mimetype']
)
if 'description' in output:
job_result.description = output['description']
db.session.add(job_result)
db.session.flush(objects=[job_result])
db.session.refresh(job_result)
os.rename(
os.path.join(results_dir, output['file']),
job_result.path
)
elif job.status == JobStatus.RUNNING and task_state == 'failed':
job.status = JobStatus.FAILED
else:
return
job.end_date = datetime.utcnow()
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}'
) )
elif job.status == JobStatus.RUNNING and task_state == 'failed':
job.status = JobStatus.FAILED
else:
return
job.end_date = datetime.utcnow()
try:
service.remove()
except docker.errors.APIError as e:
current_app.logger.error(
f'Remove service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}'
)
def remove_job_service(self, job): def _remove_job_service(job):
service_name = f'job_{job.id}' service_name = f'job_{job.id}'
try: try:
service = self.docker.services.get(service_name) service = docker_client.services.get(service_name)
except docker.errors.NotFound: except docker.errors.NotFound:
job.status = JobStatus.CANCELED job.status = JobStatus.CANCELED
return return
except docker.errors.APIError as e: except docker.errors.APIError as e:
current_app.logger.error( current_app.logger.error(
f'Get service "{service_name}" failed ' f'Get service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )
return return
try: try:
service.update(mounts=None) service.update(mounts=None)
except docker.errors.APIError as e: except docker.errors.APIError as e:
current_app.logger.error( current_app.logger.error(
f'Update service "{service_name}" failed ' f'Update service "{service_name}" failed '
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )
return return
try: try:
service.remove() service.remove()
except docker.errors.APIError as e: except docker.errors.APIError as e:
current_app.logger.error( current_app.logger.error(
f'Remove "{service_name}" service failed ' f'Remove "{service_name}" service failed '
f'due to "docker.errors.APIError": {e}' f'due to "docker.errors.APIError": {e}'
) )

View File

@ -1,8 +1,8 @@
from app.models import Permission
from flask import abort, current_app from flask import abort, current_app
from flask_login import current_user from flask_login import current_user
from functools import wraps from functools import wraps
from threading import Thread from threading import Thread
from .models import Permission
def permission_required(permission): def permission_required(permission):

View File

@ -1,27 +1,27 @@
from flask import current_app, render_template from app import mail
from flask import current_app, Flask, render_template
from flask_mail import Message from flask_mail import Message
from typing import Any, Text from threading import Thread
from . import mail from typing import Any
from .decorators import background
def create_message( def create_message(recipient: str, subject: str, template: str, **kwargs: Any) -> Message:
recipient: str,
subject: str,
template: str,
**kwargs: Any
) -> Message:
subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX'] subject_prefix: str = current_app.config['NOPAQUE_MAIL_SUBJECT_PREFIX']
msg: Message = Message( msg: Message = Message(
f'{subject_prefix} {subject}', body=render_template(f'{template}.txt.j2', **kwargs),
recipients=[recipient] html=render_template(f'{template}.html.j2', **kwargs),
recipients=[recipient],
subject=f'{subject_prefix} {subject}'
) )
msg.body = render_template(f'{template}.txt.j2', **kwargs)
msg.html = render_template(f'{template}.html.j2', **kwargs)
return msg return msg
@background def _send(app: Flask, msg):
def send(msg: Message, *args, **kwargs): with app.app_context():
with kwargs['app'].app_context():
mail.send(msg) mail.send(msg)
def send(msg: Message, *args, **kwargs):
thread = Thread(target=_send, args=[current_app._get_current_object(), msg])
thread.start()
return thread

View File

@ -58,6 +58,23 @@ def download_job_input(job_id, job_input_id):
) )
@bp.route('/<hashid:job_id>/log')
@login_required
@admin_required
def job_log(job_id):
job = Job.query.get_or_404(job_id)
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
flash(
f'Can\'t restart job "{job.title}": Status is not "Completed/Failed"', # noqa
category='error'
)
return send_from_directory(
attachment_filename=f'job_{job.hashid}_log.txt',
directory=os.path.join(job.path, 'pipeline_data'),
filename=os.path.join('logs', 'pyflow_log.txt')
)
@bp.route('/<hashid:job_id>/restart') @bp.route('/<hashid:job_id>/restart')
@login_required @login_required
@admin_required @admin_required

View File

@ -2,4 +2,4 @@ from flask import Blueprint
bp = Blueprint('main', __name__) bp = Blueprint('main', __name__)
from . import routes from . import events, routes

View File

@ -1,8 +1,8 @@
from app import hashids, socketio from app import hashids, socketio
from app.decorators import socketio_login_required from app.models import User
from app.models import TesseractOCRModel, TranskribusHTRModel, User
from flask_login import current_user from flask_login import current_user
from flask_socketio import join_room from flask_socketio import join_room
from app.decorators import socketio_login_required
@socketio.on('users.user.get') @socketio.on('users.user.get')

View File

@ -1,8 +1,8 @@
from app import db, login from app import db, login, mail, socketio
from app.converters.vrt import normalize_vrt_file from app.converters.vrt import normalize_vrt_file
from app.sqlalchemy_type_decorators import ContainerColumn, IntEnumColumn from app.email import create_message
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import IntEnum from enum import Enum, IntEnum
from flask import current_app, url_for from flask import current_app, url_for
from flask_hashids import HashidMixin from flask_hashids import HashidMixin
from flask_login import UserMixin from flask_login import UserMixin
@ -20,9 +20,57 @@ import yaml
TRANSKRIBUS_HTR_MODELS = \ TRANSKRIBUS_HTR_MODELS = \
json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text').content)['trpModelMetadata'] # noqa json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text', params={'docType': 'handwritten'}).content)['trpModelMetadata'] # noqa
##############################################################################
# enums #
##############################################################################
# region enums
class CorpusStatus(IntEnum):
UNPREPARED = 1
SUBMITTED = 2
QUEUED = 3
BUILDING = 4
BUILT = 5
FAILED = 6
STARTING_ANALYSIS_SESSION = 7
RUNNING_ANALYSIS_SESSION = 8
CANCELING_ANALYSIS_SESSION = 9
class JobStatus(IntEnum):
INITIALIZING = 1
SUBMITTED = 2
QUEUED = 3
RUNNING = 4
CANCELING = 5
CANCELED = 6
COMPLETED = 7
FAILED = 8
class Permission(IntEnum):
'''
Defines User permissions as integers by the power of 2. User permission
can be evaluated using the bitwise operator &.
'''
ADMINISTRATE = 1
CONTRIBUTE = 2
USE_API = 4
class UserSettingJobStatusMailNotificationLevel(IntEnum):
NONE = 1
END = 2
ALL = 3
# endregion enums
##############################################################################
# mixins #
##############################################################################
# region mixins
class FileMixin: class FileMixin:
''' '''
Mixin for db.Model classes. All file related models should use this. Mixin for db.Model classes. All file related models should use this.
@ -39,18 +87,59 @@ class FileMixin:
'last_edited_date': self.last_edited_date.isoformat() + 'Z', 'last_edited_date': self.last_edited_date.isoformat() + 'Z',
'mimetype': self.mimetype 'mimetype': self.mimetype
} }
# endregion mixins
class Permission(IntEnum): ##############################################################################
''' # type_decorators #
Defines User permissions as integers by the power of 2. User permission ##############################################################################
can be evaluated using the bitwise operator &. # region type_decorators
''' class IntEnumColumn(db.TypeDecorator):
ADMINISTRATE = 1 impl = db.Integer
CONTRIBUTE = 2
USE_API = 4 def __init__(self, enum_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.enum_type = enum_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.enum_type) and isinstance(value.value, int):
return value.value
elif isinstance(value, int):
return self.enum_type(value).value
else:
return TypeError()
def process_result_value(self, value, dialect):
return self.enum_type(value)
class ContainerColumn(db.TypeDecorator):
impl = db.String
def __init__(self, container_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.container_type = container_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.container_type):
return json.dumps(value)
elif (
isinstance(value, str)
and isinstance(json.loads(value), self.container_type)
):
return value
else:
return TypeError()
def process_result_value(self, value, dialect):
return json.loads(value)
# endregion type_decorators
##############################################################################
# Models #
##############################################################################
# region models
class Role(HashidMixin, db.Model): class Role(HashidMixin, db.Model):
__tablename__ = 'roles' __tablename__ = 'roles'
# Primary key # Primary key
@ -123,12 +212,6 @@ class Role(HashidMixin, db.Model):
db.session.commit() db.session.commit()
class UserSettingJobStatusMailNotificationLevel(IntEnum):
NONE = 1
END = 2
ALL = 3
class User(HashidMixin, UserMixin, db.Model): class User(HashidMixin, UserMixin, db.Model):
__tablename__ = 'users' __tablename__ = 'users'
# Primary key # Primary key
@ -449,7 +532,6 @@ class TranskribusHTRModel(HashidMixin, db.Model):
'user_id': self.user.hashid, 'user_id': self.user.hashid,
'shared': self.shared, 'shared': self.shared,
'transkribus_model_id': self.transkribus_model_id, 'transkribus_model_id': self.transkribus_model_id,
'transkribus_name': self.transkribus_name
} }
if backrefs: if backrefs:
dict_tesseract_ocr_model['user'] = \ dict_tesseract_ocr_model['user'] = \
@ -466,20 +548,14 @@ class TranskribusHTRModel(HashidMixin, db.Model):
# and 'creator' in m and m['creator'] == 'Transkribus Team' # and 'creator' in m and m['creator'] == 'Transkribus Team'
# and 'docType' in m and m['docType'] == 'handwritten' # and 'docType' in m and m['docType'] == 'handwritten'
# ] # ]
models = [ for m in TRANSKRIBUS_HTR_MODELS:
m for m in TRANSKRIBUS_HTR_MODELS
if m['modelId'] in [35909, 33744, 33597, 29820, 37789, 13685, 37855, 26124, 37738, 30919, 34763]
]
for m in models:
model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
if model is not None: if model is not None:
model.shared = True model.shared = True
model.transkribus_model_id = m['modelId'] model.transkribus_model_id = m['modelId']
model.transkribus_name = m['name']
continue continue
model = TranskribusHTRModel( model = TranskribusHTRModel(
shared=True, shared=True,
transkribus_name=m['name'],
transkribus_model_id=m['modelId'], transkribus_model_id=m['modelId'],
user=user, user=user,
) )
@ -605,17 +681,6 @@ class JobResult(FileMixin, HashidMixin, db.Model):
return self.job.user_id return self.job.user_id
class JobStatus(IntEnum):
INITIALIZING = 1
SUBMITTED = 2
QUEUED = 3
RUNNING = 4
CANCELING = 5
CANCELED = 6
COMPLETED = 7
FAILED = 8
class Job(HashidMixin, db.Model): class Job(HashidMixin, db.Model):
''' '''
Class to define Jobs. Class to define Jobs.
@ -828,18 +893,6 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
return dict_corpus_file return dict_corpus_file
class CorpusStatus(IntEnum):
UNPREPARED = 1
SUBMITTED = 2
QUEUED = 3
BUILDING = 4
BUILT = 5
FAILED = 6
STARTING_ANALYSIS_SESSION = 7
RUNNING_ANALYSIS_SESSION = 8
CANCELING_ANALYSIS_SESSION = 9
class Corpus(HashidMixin, db.Model): class Corpus(HashidMixin, db.Model):
''' '''
Class to define a corpus. Class to define a corpus.
@ -964,8 +1017,95 @@ class Corpus(HashidMixin, db.Model):
for x in self.files for x in self.files
} }
return dict_corpus return dict_corpus
# endregion models
##############################################################################
# event_handlers #
##############################################################################
# region event_handlers
@db.event.listens_for(Corpus, 'after_delete')
@db.event.listens_for(CorpusFile, 'after_delete')
@db.event.listens_for(Job, 'after_delete')
@db.event.listens_for(JobInput, 'after_delete')
@db.event.listens_for(JobResult, 'after_delete')
def ressource_after_delete(mapper, connection, ressource):
jsonpatch = [{'op': 'remove', 'path': ressource.jsonpatch_path}]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_insert')
@db.event.listens_for(CorpusFile, 'after_insert')
@db.event.listens_for(Job, 'after_insert')
@db.event.listens_for(JobInput, 'after_insert')
@db.event.listens_for(JobResult, 'after_insert')
def ressource_after_insert_handler(mapper, connection, ressource):
value = ressource.to_dict(backrefs=False, relationships=False)
for attr in mapper.relationships:
value[attr.key] = {}
jsonpatch = [
{'op': 'add', 'path': ressource.jsonpatch_path, 'value': value}
]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_update')
@db.event.listens_for(CorpusFile, 'after_update')
@db.event.listens_for(Job, 'after_update')
@db.event.listens_for(JobInput, 'after_update')
@db.event.listens_for(JobResult, 'after_update')
def ressource_after_update_handler(mapper, connection, ressource):
jsonpatch = []
for attr in db.inspect(ressource).attrs:
if attr.key in mapper.relationships:
continue
if not attr.load_history().has_changes():
continue
if isinstance(attr.value, datetime):
value = attr.value.isoformat() + 'Z'
elif isinstance(attr.value, Enum):
value = attr.value.name
else:
value = attr.value
jsonpatch.append(
{
'op': 'replace',
'path': f'{ressource.jsonpatch_path}/{attr.key}',
'value': value
}
)
if jsonpatch:
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Job, 'after_update')
def job_after_update_handler(mapper, connection, job):
for attr in db.inspect(job).attrs:
if attr.key != 'status':
continue
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE:
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END:
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return
msg = create_message(
job.user.email,
f'Status update for your Job "{job.title}"',
'tasks/email/notification',
job=job
)
mail.send(msg)
# endregion event_handlers
##############################################################################
# misc #
##############################################################################
# region misc
@login.user_loader @login.user_loader
def load_user(user_id): def load_user(user_id):
return User.query.get(int(user_id)) return User.query.get(int(user_id))
# endregion misc

View File

@ -144,7 +144,6 @@ def tesseract_ocr_pipeline():
x for x in TesseractOCRModel.query.filter().all() x for x in TesseractOCRModel.query.filter().all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
] ]
current_app.logger.warning(tesseract_ocr_models)
return render_template( return render_template(
'services/tesseract_ocr_pipeline.html.j2', 'services/tesseract_ocr_pipeline.html.j2',
form=form, form=form,

View File

@ -1,87 +0,0 @@
from app import db, mail, socketio
from app.email import create_message
from app.models import (
Corpus,
CorpusFile,
Job,
JobInput,
JobResult,
JobStatus,
UserSettingJobStatusMailNotificationLevel
)
from datetime import datetime
from enum import Enum
@db.event.listens_for(Corpus, 'after_delete')
@db.event.listens_for(CorpusFile, 'after_delete')
@db.event.listens_for(Job, 'after_delete')
@db.event.listens_for(JobInput, 'after_delete')
@db.event.listens_for(JobResult, 'after_delete')
def ressource_after_delete(mapper, connection, ressource):
jsonpatch = [{'op': 'remove', 'path': ressource.jsonpatch_path}]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_insert')
@db.event.listens_for(CorpusFile, 'after_insert')
@db.event.listens_for(Job, 'after_insert')
@db.event.listens_for(JobInput, 'after_insert')
@db.event.listens_for(JobResult, 'after_insert')
def ressource_after_insert_handler(mapper, connection, ressource):
value = ressource.to_dict(backrefs=False, relationships=False)
for attr in mapper.relationships:
value[attr.key] = {}
jsonpatch = [
{'op': 'add', 'path': ressource.jsonpatch_path, 'value': value}
]
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
@db.event.listens_for(Corpus, 'after_update')
@db.event.listens_for(CorpusFile, 'after_update')
@db.event.listens_for(Job, 'after_update')
@db.event.listens_for(JobInput, 'after_update')
@db.event.listens_for(JobResult, 'after_update')
def ressource_after_update_handler(mapper, connection, ressource):
jsonpatch = []
for attr in db.inspect(ressource).attrs:
if attr.key in mapper.relationships:
continue
if not attr.load_history().has_changes():
continue
if isinstance(attr.value, datetime):
value = attr.value.isoformat() + 'Z'
elif isinstance(attr.value, Enum):
value = attr.value.name
else:
value = attr.value
jsonpatch.append(
{
'op': 'replace',
'path': f'{ressource.jsonpatch_path}/{attr.key}',
'value': value
}
)
if isinstance(ressource, Job) and attr.key == 'status':
_job_status_email_handler(ressource)
if jsonpatch:
room = f'users.{ressource.user_hashid}'
socketio.emit('users.patch', jsonpatch, room=room)
def _job_status_email_handler(job):
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.NONE: # noqa
return
if job.user.setting_job_status_mail_notification_level == UserSettingJobStatusMailNotificationLevel.END: # noqa
if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
return
msg = create_message(
job.user.email,
f'Status update for your Job "{job.title}"',
'tasks/email/notification',
job=job
)
mail.send(msg)

View File

@ -1,43 +0,0 @@
from app import db
import json
class IntEnumColumn(db.TypeDecorator):
impl = db.Integer
def __init__(self, enum_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.enum_type = enum_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.enum_type) and isinstance(value.value, int):
return value.value
elif isinstance(value, int):
return self.enum_type(value).value
else:
return TypeError()
def process_result_value(self, value, dialect):
return self.enum_type(value)
class ContainerColumn(db.TypeDecorator):
impl = db.String
def __init__(self, container_type, *args, **kwargs):
super().__init__(*args, **kwargs)
self.container_type = container_type
def process_bind_param(self, value, dialect):
if isinstance(value, self.container_type):
return json.dumps(value)
elif (
isinstance(value, str)
and isinstance(json.loads(value), self.container_type)
):
return value
else:
return TypeError()
def process_result_value(self, value, dialect):
return json.loads(value)

View File

@ -69,6 +69,14 @@ class JobDisplay extends RessourceDisplay {
element.classList.remove('hide'); element.classList.remove('hide');
} }
} }
elements = this.displayElement.querySelectorAll('.job-log-trigger');
for (element of elements) {
if (['COMPLETED', 'FAILED'].includes(status)) {
element.classList.remove('hide');
} else {
element.classList.add('hide');
}
}
elements = this.displayElement.querySelectorAll('.job-restart-trigger'); elements = this.displayElement.querySelectorAll('.job-restart-trigger');
for (element of elements) { for (element of elements) {
if (['COMPLETED', 'FAILED'].includes(status)) { if (['COMPLETED', 'FAILED'].includes(status)) {

9
app/tasks.py Normal file
View File

@ -0,0 +1,9 @@
from app.daemon import daemon
def register(app, scheduler):
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
@scheduler.task('interval', id='daemon', seconds=3)
def daemon_task():
with app.app_context():
daemon()

View File

@ -79,6 +79,7 @@
</div> </div>
<div class="card-action right-align"> <div class="card-action right-align">
{% if current_user.is_administrator() %} {% if current_user.is_administrator() %}
<a class="btn hide modal-trigger job-log-trigger waves-effect waves-light" data-target="job-log-modal"><i class="material-icons left">repeat</i>Log</a>
<a class="btn hide modal-trigger restart-job-trigger waves-effect waves-light" data-target="restart-job-modal"><i class="material-icons left">repeat</i>Restart</a> <a class="btn hide modal-trigger restart-job-trigger waves-effect waves-light" data-target="restart-job-modal"><i class="material-icons left">repeat</i>Restart</a>
{% endif %} {% endif %}
<!-- <a href="#" class="btn disabled waves-effect waves-light"><i class="material-icons left">settings</i>Export Parameters</a> --> <!-- <a href="#" class="btn disabled waves-effect waves-light"><i class="material-icons left">settings</i>Export Parameters</a> -->
@ -143,6 +144,16 @@
{% block modals %} {% block modals %}
{{ super() }} {{ super() }}
<div id="job-log-modal" class="modal">
<div class="modal-content">
<h4>Job logs</h4>
<iframe src="{{ url_for('jobs.job_log', job_id=job.id) }}" style="border: 0; width: 100%; height: 450px;"></iframe>
</div>
<div class="modal-footer">
<a href="#!" class="btn modal-close waves-effect waves-light">Close</a>
</div>
</div>
<div id="delete-job-modal" class="modal"> <div id="delete-job-modal" class="modal">
<div class="modal-content"> <div class="modal-content">
<h4>Confirm deletion</h4> <h4>Confirm deletion</h4>

33
boot.sh
View File

@ -1,28 +1,37 @@
#!/bin/bash #!/bin/bash
source venv/bin/activate source venv/bin/activate
display_help() {
local script_name=$(basename "${0}")
echo ""
echo "Usage: ${script_name} [COMMAND]"
echo ""
echo "Run wrapper for a nopaque instance"
echo ""
echo "Commands:"
echo " flask A general utility script for Flask applications."
echo ""
echo "Run '${script_name} COMMAND --help' for more information on a command."
}
if [[ "${#}" -eq 0 ]]; then if [[ "${#}" -eq 0 ]]; then
if [[ "${NOPAQUE_IS_PRIMARY_INSTANCE:-True}" == "True" ]]; then if [[ "${NOPAQUE_IS_PRIMARY_INSTANCE:-True}" == "True" ]]; then
while true; do while true; do
echo "INFO Run deployment tasks..."
flask deploy flask deploy
if [[ "${?}" == "0" ]]; then if [[ "${?}" == "0" ]]; then
break break
fi fi
echo "WARNING ...Failed, retrying in 5 secs..." echo "Deploy command failed, retrying in 5 secs..."
sleep 5 sleep 5
done done
echo "INFO Start nopaque daemon..."
flask daemon run &
fi fi
echo "INFO Start nopaque..."
python nopaque.py python nopaque.py
elif [[ "${1}" == "flask" ]]; then elif [[ "${1}" == "flask" ]]; then
exec ${@:1} flask "${@:2}"
elif [[ "${1}" == "--help" || "${1}" == "-h" ]]; then
display_help
else else
echo "${0} [COMMAND]" display_help
echo "" exit 1
echo "nopaque startup script"
echo ""
echo "Commands:"
echo " flask"
fi fi

View File

@ -3,7 +3,7 @@
import eventlet import eventlet
eventlet.monkey_patch() eventlet.monkey_patch()
from app import db, cli, create_app, socketio # noqa from app import cli, create_app, db, scheduler, socketio # noqa
from app.models import ( from app.models import (
Corpus, Corpus,
CorpusFile, CorpusFile,
@ -49,8 +49,14 @@ def make_shell_context() -> Dict[str, Any]:
def main(): def main():
with app.app_context():
if app.config['NOPAQUE_IS_PRIMARY_INSTANCE']:
for corpus in Corpus.query.filter(Corpus.num_analysis_sessions > 0).all():
corpus.num_analysis_sessions = 0
db.session.commit()
scheduler.start()
socketio.run(app, host='0.0.0.0') socketio.run(app, host='0.0.0.0')
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -93,4 +93,4 @@ wtforms[email]
# Werkzeug==1.0.1 # Werkzeug==1.0.1
# wrapt==1.14.1 # wrapt==1.14.1
# WTForms==3.0.1 # WTForms==3.0.1
# zipp==3.8.0 # zipp==3.8.0