2020-06-05 12:42:04 +00:00
|
|
|
from datetime import datetime
|
|
|
|
from logger.logger import init_logger
|
|
|
|
from tasks import Session, docker_client, NOPAQUE_STORAGE
|
2020-06-10 12:18:02 +00:00
|
|
|
from tasks.decorators import background
|
2020-06-05 12:42:04 +00:00
|
|
|
from tasks.Models import Job, NotificationData, NotificationEmailData, JobResult
|
|
|
|
import docker
|
|
|
|
import json
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
2020-06-10 12:18:02 +00:00
|
|
|
@background
|
2020-06-05 12:42:04 +00:00
|
|
|
def check_jobs():
|
|
|
|
# logger = init_logger()
|
|
|
|
cj_session = Session()
|
|
|
|
jobs = cj_session.query(Job).all()
|
|
|
|
for job in filter(lambda job: job.status == 'submitted', jobs):
|
|
|
|
__create_job_service(job)
|
|
|
|
for job in filter(lambda job: (job.status == 'queued'), jobs):
|
|
|
|
__checkout_job_service(job, cj_session)
|
|
|
|
__add_notification_data(job, 'queued', cj_session)
|
|
|
|
for job in filter(lambda job: (job.status == 'running'), jobs):
|
|
|
|
__checkout_job_service(job, cj_session)
|
|
|
|
__add_notification_data(job, 'running', cj_session)
|
|
|
|
for job in filter(lambda job: job.status == 'complete', jobs):
|
|
|
|
__add_notification_data(job, 'complete', cj_session)
|
|
|
|
for job in filter(lambda job: job.status == 'failed', jobs):
|
|
|
|
__add_notification_data(job, 'failed', cj_session)
|
|
|
|
for job in filter(lambda job: job.status == 'canceling', jobs):
|
|
|
|
__remove_job_service(job)
|
|
|
|
cj_session.commit()
|
|
|
|
Session.remove()
|
|
|
|
|
|
|
|
|
|
|
|
def __add_notification_data(job, notified_on_status, scoped_session):
|
|
|
|
logger = init_logger()
|
|
|
|
# checks if user wants any notifications at all
|
|
|
|
if (job.user.setting_job_status_mail_notifications == 'none'):
|
|
|
|
# logger.warning('User does not want any notifications!')
|
|
|
|
return
|
|
|
|
# checks if user wants only notification on completed jobs
|
|
|
|
elif (job.user.setting_job_status_mail_notifications == 'end'
|
|
|
|
and notified_on_status != 'complete'):
|
|
|
|
# logger.warning('User only wants notifications on job completed!')
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
# check if a job already has associated NotificationData
|
|
|
|
notification_exists = len(job.notification_data)
|
|
|
|
# create notification_data for current job if there is none
|
|
|
|
if (notification_exists == 0):
|
|
|
|
notification_data = NotificationData(job_id=job.id)
|
|
|
|
scoped_session.add(notification_data)
|
2020-06-08 08:23:32 +00:00
|
|
|
scoped_session.commit()
|
|
|
|
# If no commit job will have no NotificationData
|
2020-06-05 12:42:04 +00:00
|
|
|
# logger.warning('Created NotificationData for current Job.'))
|
|
|
|
else:
|
|
|
|
pass
|
|
|
|
# logger.warning('Job already had notification: {}'.format(notification_exists))
|
|
|
|
if (job.notification_data[0].notified_on != notified_on_status):
|
|
|
|
notification_email_data = NotificationEmailData(job_id=job.id)
|
|
|
|
notification_email_data.notify_status = notified_on_status
|
|
|
|
notification_email_data.creation_date = datetime.utcnow()
|
|
|
|
job.notification_data[0].notified_on = notified_on_status
|
|
|
|
scoped_session.add(notification_email_data)
|
|
|
|
logger.warning('Created NotificationEmailData for current Job.')
|
|
|
|
else:
|
|
|
|
# logger.warning('NotificationEmailData has already been created for current Job!')
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def __create_job_service(job):
|
|
|
|
job_dir = os.path.join(NOPAQUE_STORAGE, str(job.user_id), 'jobs',
|
|
|
|
str(job.id))
|
2020-06-23 13:20:05 +00:00
|
|
|
cmd = '{} -i /files -o /files/output'.format(job.service)
|
|
|
|
if job.service == 'file-setup':
|
|
|
|
cmd += ' -f {}'.format(job.secure_filename)
|
|
|
|
cmd += ' --log-dir /files'
|
|
|
|
cmd += ' --zip [{}]_{}'.format(job.service, job.secure_filename)
|
|
|
|
cmd += ' ' + ' '.join(json.loads(job.service_args))
|
|
|
|
service_args = {'command': cmd,
|
2020-06-05 12:42:04 +00:00
|
|
|
'constraints': ['node.role==worker'],
|
|
|
|
'labels': {'origin': 'nopaque',
|
|
|
|
'type': 'service.{}'.format(job.service),
|
|
|
|
'job_id': str(job.id)},
|
|
|
|
'mounts': [job_dir + ':/files:rw'],
|
|
|
|
'name': 'job_{}'.format(job.id),
|
|
|
|
'resources': docker.types.Resources(
|
|
|
|
cpu_reservation=job.n_cores * (10 ** 9),
|
|
|
|
mem_reservation=job.mem_mb * (10 ** 6)),
|
|
|
|
'restart_policy': docker.types.RestartPolicy()}
|
|
|
|
service_image = ('gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/'
|
|
|
|
+ job.service + ':' + job.service_version)
|
|
|
|
try:
|
|
|
|
service = docker_client.services.get(service_args['name'])
|
|
|
|
except docker.errors.NotFound:
|
|
|
|
pass
|
|
|
|
except docker.errors.DockerException:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
service.remove()
|
|
|
|
try:
|
|
|
|
docker_client.services.create(service_image, **service_args)
|
|
|
|
except docker.errors.DockerException:
|
|
|
|
job.status = 'failed'
|
|
|
|
else:
|
|
|
|
job.status = 'queued'
|
|
|
|
|
|
|
|
|
|
|
|
def __checkout_job_service(job, scoped_session):
|
|
|
|
logger = init_logger()
|
|
|
|
service_name = 'job_{}'.format(job.id)
|
|
|
|
try:
|
|
|
|
service = docker_client.services.get(service_name)
|
|
|
|
except docker.errors.NotFound:
|
|
|
|
logger.error('__checkout_job_service({}):'.format(job.id)
|
|
|
|
+ ' The service does not exist.'
|
|
|
|
+ ' (stauts: {} -> failed)'.format(job.status))
|
|
|
|
job.status = 'failed'
|
|
|
|
return
|
|
|
|
except docker.errors.DockerException:
|
|
|
|
return
|
|
|
|
service_tasks = service.tasks()
|
|
|
|
if not service_tasks:
|
|
|
|
return
|
|
|
|
task_state = service_tasks[0].get('Status').get('State')
|
|
|
|
if job.status == 'queued' and task_state != 'pending':
|
|
|
|
job.status = 'running'
|
|
|
|
elif (job.status == 'running'
|
|
|
|
and (task_state == 'complete' or task_state == 'failed')):
|
|
|
|
service.remove()
|
|
|
|
job.end_date = datetime.utcnow()
|
|
|
|
job.status = task_state
|
|
|
|
if task_state == 'complete':
|
|
|
|
results_dir = os.path.join(NOPAQUE_STORAGE, str(job.user_id),
|
|
|
|
'jobs', str(job.id), 'output')
|
|
|
|
results = filter(lambda x: x.endswith('.zip'),
|
|
|
|
os.listdir(results_dir))
|
|
|
|
for result in results:
|
|
|
|
job_result = JobResult(dir=results_dir, filename=result,
|
|
|
|
job_id=job.id)
|
|
|
|
scoped_session.add(job_result)
|
|
|
|
|
|
|
|
|
|
|
|
def __remove_job_service(job):
|
|
|
|
service_name = 'job_{}'.format(job.id)
|
|
|
|
try:
|
|
|
|
service = docker_client.services.get(service_name)
|
|
|
|
except docker.errors.NotFound:
|
|
|
|
job.status = 'canceled'
|
|
|
|
except docker.errors.DockerException:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
service.update(mounts=None)
|
2020-06-08 08:23:32 +00:00
|
|
|
service.remove()
|