nopaque/daemon/tasks/check_jobs.py

147 lines
5.8 KiB
Python
Raw Normal View History

2020-06-05 14:42:04 +02:00
from datetime import datetime
from . import config, docker_client, Session
from .decorators import background
from .models import Job, JobResult, NotificationData, NotificationEmailData
2020-06-05 14:42:04 +02:00
import docker
import logging
2020-06-05 14:42:04 +02:00
import json
import os
2020-06-10 14:18:02 +02:00
@background
2020-06-05 14:42:04 +02:00
def check_jobs():
session = Session()
jobs = session.query(Job).all()
2020-06-05 14:42:04 +02:00
for job in filter(lambda job: job.status == 'submitted', jobs):
__create_job_service(job)
for job in filter(lambda job: job.status == 'queued', jobs):
__checkout_job_service(job, session)
__add_notification_data(job, 'queued', session)
for job in filter(lambda job: job.status == 'running', jobs):
__checkout_job_service(job, session)
__add_notification_data(job, 'running', session)
2020-06-05 14:42:04 +02:00
for job in filter(lambda job: job.status == 'complete', jobs):
__add_notification_data(job, 'complete', session)
2020-06-05 14:42:04 +02:00
for job in filter(lambda job: job.status == 'failed', jobs):
__add_notification_data(job, 'failed', session)
2020-06-05 14:42:04 +02:00
for job in filter(lambda job: job.status == 'canceling', jobs):
__remove_job_service(job)
session.commit()
2020-06-05 14:42:04 +02:00
Session.remove()
def __add_notification_data(job, notified_on_status, session):
2020-06-05 14:42:04 +02:00
# checks if user wants any notifications at all
if (job.user.setting_job_status_mail_notifications == 'none'):
return
# checks if user wants only notification on completed jobs
elif (job.user.setting_job_status_mail_notifications == 'end'
and notified_on_status != 'complete'):
return
else:
# check if a job already has associated NotificationData
notification_exists = len(job.notification_data)
# create notification_data for current job if there is none
if (notification_exists == 0):
notification_data = NotificationData(job_id=job.id)
session.add(notification_data)
# If no commit job will have no NotificationData
session.commit()
2020-06-05 14:42:04 +02:00
if (job.notification_data[0].notified_on != notified_on_status):
notification_email_data = NotificationEmailData(job_id=job.id)
notification_email_data.notify_status = notified_on_status
notification_email_data.creation_date = datetime.utcnow()
job.notification_data[0].notified_on = notified_on_status
session.add(notification_email_data)
2020-06-05 14:42:04 +02:00
def __create_job_service(job):
job_dir = os.path.join(config.DATA_DIR,
str(job.user_id),
'jobs',
2020-06-05 14:42:04 +02:00
str(job.id))
2020-06-23 15:20:05 +02:00
cmd = '{} -i /files -o /files/output'.format(job.service)
if job.service == 'file-setup':
cmd += ' -f {}'.format(job.secure_filename)
cmd += ' --log-dir /files'
cmd += ' --zip [{}]_{}'.format(job.service, job.secure_filename)
cmd += ' ' + ' '.join(json.loads(job.service_args))
service_args = {'command': cmd,
2020-06-05 14:42:04 +02:00
'constraints': ['node.role==worker'],
'labels': {'origin': 'nopaque',
'type': 'service.{}'.format(job.service),
'job_id': str(job.id)},
'mounts': [job_dir + ':/files:rw'],
'name': 'job_{}'.format(job.id),
'resources': docker.types.Resources(
cpu_reservation=job.n_cores * (10 ** 9),
mem_reservation=job.mem_mb * (10 ** 6)),
'restart_policy': docker.types.RestartPolicy()}
service_image = ('gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/'
+ job.service + ':' + job.service_version)
try:
service = docker_client.services.get(service_args['name'])
except docker.errors.NotFound:
pass
except docker.errors.DockerException:
return
else:
service.remove()
try:
docker_client.services.create(service_image, **service_args)
except docker.errors.DockerException:
job.status = 'failed'
else:
job.status = 'queued'
def __checkout_job_service(job, session):
2020-06-05 14:42:04 +02:00
service_name = 'job_{}'.format(job.id)
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound:
logging.error('__checkout_job_service({}): '.format(job.id)
+ 'The service does not exist. '
+ '(status: {} -> failed)'.format(job.status))
2020-06-05 14:42:04 +02:00
job.status = 'failed'
return
except docker.errors.DockerException:
return
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if job.status == 'queued' and task_state != 'pending':
job.status = 'running'
elif (job.status == 'running'
and (task_state == 'complete' or task_state == 'failed')):
service.remove()
job.end_date = datetime.utcnow()
job.status = task_state
if task_state == 'complete':
results_dir = os.path.join(config.DATA_DIR,
str(job.user_id),
'jobs',
str(job.id),
'output')
2020-06-05 14:42:04 +02:00
results = filter(lambda x: x.endswith('.zip'),
os.listdir(results_dir))
for result in results:
job_result = JobResult(dir=results_dir,
filename=result,
2020-06-05 14:42:04 +02:00
job_id=job.id)
session.add(job_result)
2020-06-05 14:42:04 +02:00
def __remove_job_service(job):
service_name = 'job_{}'.format(job.id)
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound:
job.status = 'canceled'
except docker.errors.DockerException:
return
else:
service.update(mounts=None)
service.remove()