2019-07-17 13:34:20 +02:00
|
|
|
import docker
|
2019-07-19 13:28:57 +02:00
|
|
|
import time
|
2019-08-06 14:27:41 +02:00
|
|
|
import json
|
2019-08-06 14:54:00 +02:00
|
|
|
import os
|
2019-07-17 13:34:20 +02:00
|
|
|
|
|
|
|
|
|
|
|
class Swarm:
|
|
|
|
def __init__(self):
|
|
|
|
self.docker = docker.from_env()
|
|
|
|
|
2019-07-19 13:28:57 +02:00
|
|
|
'''
|
|
|
|
' Swarm mode is intendet to run containers which serve a non terminating
|
|
|
|
' service like a webserver. For processing an occuring job it is necessary
|
|
|
|
' to use a one-shot container, which stops after the wrapped job process is
|
|
|
|
' completly executed. In order to run these one-shot containers in Swarm
|
|
|
|
' mode, the following run method is implemented analog to the presented
|
|
|
|
' implementation in Alex Ellis' blog post "One-shot containers on Docker
|
|
|
|
' Swarm"¹.
|
|
|
|
'
|
|
|
|
' ¹ https://blog.alexellis.io/containers-on-swarm/
|
|
|
|
'''
|
|
|
|
|
|
|
|
def run(self, job):
|
2019-08-06 14:27:41 +02:00
|
|
|
'''
|
|
|
|
Input is a job object. From this the _command is built.
|
|
|
|
'''
|
2019-07-19 13:28:57 +02:00
|
|
|
# Prepare argument values needed for the service creation.
|
2019-08-06 14:27:41 +02:00
|
|
|
service_args = json.loads(job.service_args)
|
|
|
|
ressources = json.loads(job.ressources)
|
|
|
|
_command = (job.service
|
2019-08-06 14:54:00 +02:00
|
|
|
+ ' -i /files'
|
2019-08-06 14:27:41 +02:00
|
|
|
+ ' -l {}'.format(service_args['lang'])
|
2019-08-06 14:54:00 +02:00
|
|
|
+ ' -o /files/output'
|
2019-08-06 14:27:41 +02:00
|
|
|
+ ' ' + ' '.join(service_args['args']))
|
2019-07-19 13:28:57 +02:00
|
|
|
_constraints = ['node.role==worker']
|
|
|
|
_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format(
|
2019-08-06 14:27:41 +02:00
|
|
|
job.service,
|
|
|
|
service_args['version']
|
2019-07-19 13:28:57 +02:00
|
|
|
)
|
2019-08-06 14:27:41 +02:00
|
|
|
_labels = {'service': job.service}
|
2019-08-06 14:54:00 +02:00
|
|
|
_mounts = [os.path.join('/home/compute/mnt/opaque',
|
|
|
|
str(job.user_id),
|
|
|
|
'jobs',
|
|
|
|
str(job.id))
|
|
|
|
+ ':/files:rw']
|
2019-08-06 14:27:41 +02:00
|
|
|
_name = job.id
|
2019-07-17 13:34:20 +02:00
|
|
|
'''
|
2019-07-19 13:28:57 +02:00
|
|
|
' The Docker SDK for Python expects the cpu_reservation value to be
|
|
|
|
' scaled to nanos (10^9). Because the job object contains unscaled
|
|
|
|
' (10^0) values, it must be conveted.
|
|
|
|
'
|
|
|
|
' While the cpu_reservation value has to be in nanos, the
|
2019-07-31 09:10:37 +02:00
|
|
|
' mem_reservation value must be presented in an unscaled form
|
|
|
|
' (intuitive right?). Bacause the job object provides the memory value
|
|
|
|
' in megabytes, it is also necessary to convert the value.
|
2019-07-17 13:34:20 +02:00
|
|
|
'''
|
2019-07-19 13:28:57 +02:00
|
|
|
_resources = docker.types.Resources(
|
2019-08-06 14:27:41 +02:00
|
|
|
cpu_reservation=ressources['n_cores'] * (10 ** 9),
|
|
|
|
mem_reservation=ressources['mem_mb'] * (10 ** 6)
|
2019-07-19 13:28:57 +02:00
|
|
|
)
|
|
|
|
_restart_policy = docker.types.RestartPolicy(condition='none')
|
2019-07-17 13:34:20 +02:00
|
|
|
'''
|
2019-07-19 13:28:57 +02:00
|
|
|
' Create the service with the prepared values.
|
|
|
|
'
|
|
|
|
' Note: A service reserves hardware ressources. In case no worker node
|
|
|
|
' has the required ressources available (not reserved), the
|
|
|
|
' service gets queued by the Docker engine until a node is able
|
|
|
|
' to meet the requirements.
|
|
|
|
'
|
|
|
|
' TODO: The name argument should be used with the prepared value
|
|
|
|
' (name=_name). Because there is no id generator for now, it is
|
|
|
|
' not set, so that the Docker engine assigns a random name.
|
2019-07-17 13:34:20 +02:00
|
|
|
'''
|
2019-08-05 15:35:51 +02:00
|
|
|
print(_command)
|
|
|
|
print(_image)
|
2019-07-19 13:28:57 +02:00
|
|
|
service = self.docker.services.create(
|
|
|
|
_image,
|
|
|
|
command=_command,
|
|
|
|
constraints=_constraints,
|
|
|
|
labels=_labels,
|
|
|
|
mounts=_mounts,
|
|
|
|
resources=_resources,
|
|
|
|
restart_policy=_restart_policy
|
|
|
|
)
|
2019-07-17 13:34:20 +02:00
|
|
|
'''
|
2019-07-19 13:28:57 +02:00
|
|
|
' Poll the service until the job is completly executed.
|
2019-07-17 13:34:20 +02:00
|
|
|
'
|
2019-07-19 13:28:57 +02:00
|
|
|
' Note: Because it takes some time until all data in the service object
|
|
|
|
' is initialized (especcially the task list returned by the tasks
|
|
|
|
' method) the poll loop also checks if the task list is empy (The
|
|
|
|
' not service.tasks() condition implements this).
|
2019-07-17 13:34:20 +02:00
|
|
|
'''
|
2019-08-05 15:35:51 +02:00
|
|
|
print(service.attrs)
|
2019-07-19 13:28:57 +02:00
|
|
|
while not service.tasks() or \
|
|
|
|
service.tasks()[0].get('Status').get('State') != 'complete':
|
|
|
|
time.sleep(1)
|
|
|
|
service.reload()
|
|
|
|
# Remove the service from the swarm.
|
|
|
|
service.remove()
|
2019-07-17 13:34:20 +02:00
|
|
|
|
|
|
|
return
|