nopaque/daemon/tasks/check_jobs.py
2020-06-05 15:25:16 +02:00

150 lines
6.4 KiB
Python

from datetime import datetime
from decorators import background
from logger.logger import init_logger
from tasks import Session, docker_client, NOPAQUE_STORAGE
from tasks.Models import Job, NotificationData, NotificationEmailData, JobResult
import docker
import json
import os
@background
def check_jobs():
# logger = init_logger()
cj_session = Session()
jobs = cj_session.query(Job).all()
for job in filter(lambda job: job.status == 'submitted', jobs):
__create_job_service(job)
for job in filter(lambda job: (job.status == 'queued'), jobs):
__checkout_job_service(job, cj_session)
__add_notification_data(job, 'queued', cj_session)
for job in filter(lambda job: (job.status == 'running'), jobs):
__checkout_job_service(job, cj_session)
__add_notification_data(job, 'running', cj_session)
for job in filter(lambda job: job.status == 'complete', jobs):
__add_notification_data(job, 'complete', cj_session)
for job in filter(lambda job: job.status == 'failed', jobs):
__add_notification_data(job, 'failed', cj_session)
for job in filter(lambda job: job.status == 'canceling', jobs):
__remove_job_service(job)
cj_session.commit()
Session.remove()
def __add_notification_data(job, notified_on_status, scoped_session):
logger = init_logger()
# checks if user wants any notifications at all
if (job.user.setting_job_status_mail_notifications == 'none'):
# logger.warning('User does not want any notifications!')
return
# checks if user wants only notification on completed jobs
elif (job.user.setting_job_status_mail_notifications == 'end'
and notified_on_status != 'complete'):
# logger.warning('User only wants notifications on job completed!')
return
else:
# check if a job already has associated NotificationData
notification_exists = len(job.notification_data)
# create notification_data for current job if there is none
if (notification_exists == 0):
notification_data = NotificationData(job_id=job.id)
scoped_session.add(notification_data)
scoped_session.commit() # If no commit job will have no NotificationData
# logger.warning('Created NotificationData for current Job.'))
else:
pass
# logger.warning('Job already had notification: {}'.format(notification_exists))
if (job.notification_data[0].notified_on != notified_on_status):
notification_email_data = NotificationEmailData(job_id=job.id)
notification_email_data.notify_status = notified_on_status
notification_email_data.creation_date = datetime.utcnow()
job.notification_data[0].notified_on = notified_on_status
scoped_session.add(notification_email_data)
logger.warning('Created NotificationEmailData for current Job.')
else:
# logger.warning('NotificationEmailData has already been created for current Job!')
pass
def __create_job_service(job):
job_dir = os.path.join(NOPAQUE_STORAGE, str(job.user_id), 'jobs',
str(job.id))
service_args = {'command': ('{} /files /files/output'.format(job.service)
+ ' {}'.format(job.secure_filename if job.service == 'file-setup' else '')
+ ' --log-dir /files'
+ ' --zip [{}]_{}'.format(job.service, job.secure_filename)
+ ' ' + ' '.join(json.loads(job.service_args))),
'constraints': ['node.role==worker'],
'labels': {'origin': 'nopaque',
'type': 'service.{}'.format(job.service),
'job_id': str(job.id)},
'mounts': [job_dir + ':/files:rw'],
'name': 'job_{}'.format(job.id),
'resources': docker.types.Resources(
cpu_reservation=job.n_cores * (10 ** 9),
mem_reservation=job.mem_mb * (10 ** 6)),
'restart_policy': docker.types.RestartPolicy()}
service_image = ('gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/'
+ job.service + ':' + job.service_version)
try:
service = docker_client.services.get(service_args['name'])
except docker.errors.NotFound:
pass
except docker.errors.DockerException:
return
else:
service.remove()
try:
docker_client.services.create(service_image, **service_args)
except docker.errors.DockerException:
job.status = 'failed'
else:
job.status = 'queued'
def __checkout_job_service(job, scoped_session):
logger = init_logger()
service_name = 'job_{}'.format(job.id)
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound:
logger.error('__checkout_job_service({}):'.format(job.id)
+ ' The service does not exist.'
+ ' (stauts: {} -> failed)'.format(job.status))
job.status = 'failed'
return
except docker.errors.DockerException:
return
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if job.status == 'queued' and task_state != 'pending':
job.status = 'running'
elif (job.status == 'running'
and (task_state == 'complete' or task_state == 'failed')):
service.remove()
job.end_date = datetime.utcnow()
job.status = task_state
if task_state == 'complete':
results_dir = os.path.join(NOPAQUE_STORAGE, str(job.user_id),
'jobs', str(job.id), 'output')
results = filter(lambda x: x.endswith('.zip'),
os.listdir(results_dir))
for result in results:
job_result = JobResult(dir=results_dir, filename=result,
job_id=job.id)
scoped_session.add(job_result)
def __remove_job_service(job):
service_name = 'job_{}'.format(job.id)
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound:
job.status = 'canceled'
except docker.errors.DockerException:
return
else:
service.update(mounts=None)
service.remove()