mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-18 14:00:33 +00:00
147 lines
5.8 KiB
Python
147 lines
5.8 KiB
Python
from datetime import datetime
|
|
from . import config, docker_client, Session
|
|
from .decorators import background
|
|
from .models import Job, JobResult, NotificationData, NotificationEmailData
|
|
import docker
|
|
import logging
|
|
import json
|
|
import os
|
|
|
|
|
|
@background
|
|
def check_jobs():
|
|
session = Session()
|
|
jobs = session.query(Job).all()
|
|
for job in filter(lambda job: job.status == 'submitted', jobs):
|
|
__create_job_service(job)
|
|
for job in filter(lambda job: job.status == 'queued', jobs):
|
|
__checkout_job_service(job, session)
|
|
__add_notification_data(job, 'queued', session)
|
|
for job in filter(lambda job: job.status == 'running', jobs):
|
|
__checkout_job_service(job, session)
|
|
__add_notification_data(job, 'running', session)
|
|
for job in filter(lambda job: job.status == 'complete', jobs):
|
|
__add_notification_data(job, 'complete', session)
|
|
for job in filter(lambda job: job.status == 'failed', jobs):
|
|
__add_notification_data(job, 'failed', session)
|
|
for job in filter(lambda job: job.status == 'canceling', jobs):
|
|
__remove_job_service(job)
|
|
session.commit()
|
|
Session.remove()
|
|
|
|
|
|
def __add_notification_data(job, notified_on_status, session):
|
|
# checks if user wants any notifications at all
|
|
if (job.user.setting_job_status_mail_notifications == 'none'):
|
|
return
|
|
# checks if user wants only notification on completed jobs
|
|
elif (job.user.setting_job_status_mail_notifications == 'end'
|
|
and notified_on_status != 'complete'):
|
|
return
|
|
else:
|
|
# check if a job already has associated NotificationData
|
|
notification_exists = len(job.notification_data)
|
|
# create notification_data for current job if there is none
|
|
if (notification_exists == 0):
|
|
notification_data = NotificationData(job_id=job.id)
|
|
session.add(notification_data)
|
|
# If no commit job will have no NotificationData
|
|
session.commit()
|
|
if (job.notification_data[0].notified_on != notified_on_status):
|
|
notification_email_data = NotificationEmailData(job_id=job.id)
|
|
notification_email_data.notify_status = notified_on_status
|
|
notification_email_data.creation_date = datetime.utcnow()
|
|
job.notification_data[0].notified_on = notified_on_status
|
|
session.add(notification_email_data)
|
|
|
|
|
|
def __create_job_service(job):
|
|
job_dir = os.path.join(config.DATA_DIR,
|
|
str(job.user_id),
|
|
'jobs',
|
|
str(job.id))
|
|
cmd = '{} -i /files -o /files/output'.format(job.service)
|
|
if job.service == 'file-setup':
|
|
cmd += ' -f {}'.format(job.secure_filename)
|
|
cmd += ' --log-dir /files'
|
|
cmd += ' --zip [{}]_{}'.format(job.service, job.secure_filename)
|
|
cmd += ' ' + ' '.join(json.loads(job.service_args))
|
|
service_args = {'command': cmd,
|
|
'constraints': ['node.role==worker'],
|
|
'labels': {'origin': 'nopaque',
|
|
'type': 'service.{}'.format(job.service),
|
|
'job_id': str(job.id)},
|
|
'mounts': [job_dir + ':/files:rw'],
|
|
'name': 'job_{}'.format(job.id),
|
|
'resources': docker.types.Resources(
|
|
cpu_reservation=job.n_cores * (10 ** 9),
|
|
mem_reservation=job.mem_mb * (10 ** 6)),
|
|
'restart_policy': docker.types.RestartPolicy()}
|
|
service_image = ('gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/'
|
|
+ job.service + ':' + job.service_version)
|
|
try:
|
|
service = docker_client.services.get(service_args['name'])
|
|
except docker.errors.NotFound:
|
|
pass
|
|
except docker.errors.DockerException:
|
|
return
|
|
else:
|
|
service.remove()
|
|
try:
|
|
docker_client.services.create(service_image, **service_args)
|
|
except docker.errors.DockerException:
|
|
job.status = 'failed'
|
|
else:
|
|
job.status = 'queued'
|
|
|
|
|
|
def __checkout_job_service(job, session):
|
|
service_name = 'job_{}'.format(job.id)
|
|
try:
|
|
service = docker_client.services.get(service_name)
|
|
except docker.errors.NotFound:
|
|
logging.error('__checkout_job_service({}): '.format(job.id)
|
|
+ 'The service does not exist. '
|
|
+ '(status: {} -> failed)'.format(job.status))
|
|
job.status = 'failed'
|
|
return
|
|
except docker.errors.DockerException:
|
|
return
|
|
service_tasks = service.tasks()
|
|
if not service_tasks:
|
|
return
|
|
task_state = service_tasks[0].get('Status').get('State')
|
|
if job.status == 'queued' and task_state != 'pending':
|
|
job.status = 'running'
|
|
elif (job.status == 'running'
|
|
and (task_state == 'complete' or task_state == 'failed')):
|
|
service.remove()
|
|
job.end_date = datetime.utcnow()
|
|
job.status = task_state
|
|
if task_state == 'complete':
|
|
results_dir = os.path.join(config.DATA_DIR,
|
|
str(job.user_id),
|
|
'jobs',
|
|
str(job.id),
|
|
'output')
|
|
results = filter(lambda x: x.endswith('.zip'),
|
|
os.listdir(results_dir))
|
|
for result in results:
|
|
job_result = JobResult(dir=results_dir,
|
|
filename=result,
|
|
job_id=job.id)
|
|
session.add(job_result)
|
|
|
|
|
|
def __remove_job_service(job):
|
|
service_name = 'job_{}'.format(job.id)
|
|
try:
|
|
service = docker_client.services.get(service_name)
|
|
except docker.errors.NotFound:
|
|
job.status = 'canceled'
|
|
except docker.errors.DockerException:
|
|
return
|
|
else:
|
|
service.update(mounts=None)
|
|
service.remove()
|