2019-08-13 12:10:50 +00:00
|
|
|
from datetime import datetime
|
2019-08-20 13:57:58 +00:00
|
|
|
from . import db, scheduler
|
2019-08-13 12:10:50 +00:00
|
|
|
from .models import Job
|
2019-07-17 11:34:20 +00:00
|
|
|
import docker
|
2019-08-06 12:27:41 +00:00
|
|
|
import json
|
2019-08-06 12:54:00 +00:00
|
|
|
import os
|
2019-07-17 11:34:20 +00:00
|
|
|
|
|
|
|
|
2019-08-13 12:27:02 +00:00
|
|
|
def checkout_jobs():
|
2019-08-20 13:57:58 +00:00
|
|
|
with scheduler.app.app_context():
|
|
|
|
client = docker.from_env()
|
|
|
|
jobs = db.session.query(Job)
|
2019-08-21 06:37:10 +00:00
|
|
|
for job in jobs.filter_by(status='submitted').all():
|
2019-08-20 13:57:58 +00:00
|
|
|
_command = (job.service
|
|
|
|
+ ' -i /files'
|
|
|
|
+ ' -o /files/output'
|
|
|
|
+ ' ' + ' '.join(json.loads(job.service_args)))
|
|
|
|
_constraints = ['node.role==worker']
|
|
|
|
_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format(
|
|
|
|
job.service,
|
|
|
|
job.service_version
|
|
|
|
)
|
|
|
|
_labels = {'service': job.service}
|
|
|
|
_mounts = [os.path.join('/home/compute/mnt/opaque',
|
|
|
|
str(job.user_id),
|
|
|
|
'jobs',
|
|
|
|
str(job.id))
|
|
|
|
+ ':/files:rw']
|
|
|
|
_name = str(job.id)
|
|
|
|
'''
|
|
|
|
' The Docker SDK for Python expects the cpu_reservation value to be
|
|
|
|
' scaled to nanos (10^9). Because the job object contains unscaled
|
|
|
|
' (10^0) values, it must be conveted.
|
|
|
|
'
|
|
|
|
' While the cpu_reservation value has to be in nanos, the
|
|
|
|
' mem_reservation value must be presented in an unscaled form
|
|
|
|
' (intuitive right?). Bacause the job object provides the memory
|
|
|
|
' value in megabytes, it is also necessary to convert the value.
|
|
|
|
'''
|
|
|
|
_resources = docker.types.Resources(
|
|
|
|
cpu_reservation=job.n_cores * (10 ** 9),
|
|
|
|
mem_reservation=job.mem_mb * (10 ** 6)
|
|
|
|
)
|
|
|
|
_restart_policy = docker.types.RestartPolicy(condition='none')
|
|
|
|
'''
|
|
|
|
' Create the service with the prepared values.
|
|
|
|
'
|
|
|
|
' Note: A service reserves hardware ressources. In case no worker
|
|
|
|
' node has the required ressources available (not reserved),
|
|
|
|
' the service gets queued by the Docker engine until a node
|
|
|
|
' is able to meet the requirements.
|
|
|
|
'''
|
|
|
|
service = client.services.create(
|
|
|
|
_image,
|
|
|
|
command=_command,
|
|
|
|
constraints=_constraints,
|
|
|
|
labels=_labels,
|
|
|
|
mounts=_mounts,
|
|
|
|
name=_name,
|
|
|
|
resources=_resources,
|
|
|
|
restart_policy=_restart_policy
|
|
|
|
)
|
|
|
|
job.status = 'scheduled'
|
2019-08-21 06:37:10 +00:00
|
|
|
for job in jobs.filter(Job.status != 'complete',
|
|
|
|
Job.status != 'failed',
|
|
|
|
Job.status != 'submitted').all():
|
|
|
|
try:
|
|
|
|
service = client.services.get(str(job.id))
|
|
|
|
job.status = service.tasks()[0].get('Status').get('State')
|
|
|
|
if job.status == 'complete' or job.status == 'failed':
|
|
|
|
job.end_date = datetime.utcnow()
|
|
|
|
service.remove()
|
|
|
|
except docker.errors.NotFound:
|
|
|
|
job.status = 'failed'
|
2019-08-20 13:57:58 +00:00
|
|
|
db.session.commit()
|