nopaque/app/swarm.py

99 lines
3.8 KiB
Python
Raw Normal View History

2019-07-17 11:34:20 +00:00
import docker
import subprocess
class Swarm:
def __init__(self):
self.docker = docker.from_env()
self.checkout()
def checkout(self):
cpus = 0
memory = 0
for node in self.docker.nodes.list(filters={'role': 'worker'}):
if node.attrs.get('Status').get('State') == 'ready':
cpus += 0 or node.attrs \
.get('Description') \
.get('Resources') \
.get('NanoCPUs')
memory += 0 or node.attrs \
.get('Description') \
.get('Resources') \
.get('MemoryBytes')
'''
' For whatever reason the Python Docker SDK provides a CPU count in
' nano (10^-6), whilst this is not that handy, it gets converted.
'''
cpus *= 10 ** -9
'''
' For a more natural handling the memory information
' gets converted from bytes to megabytes.
'''
memory *= 10 ** -6
self.cpus = int(cpus)
self.memory = int(memory)
self.available_cpus = self.cpus
self.available_memory = self.memory
def run(self, job):
if self.available_cpus < job['requested_cpus'] or \
self.available_memory < job['requested_memory']:
print('Not enough ressources available.')
'''
' TODO: At this point the scheduler thinks that the job gets
' processed, which apparently is not the case. So the job
' needs to get rescheduled and gain a new chance to get
' processed (next).
'
' Note: Maybe it is a good idea to create a method that checks if
' enough ressources are available before the run method gets
' executed. This would replace the need of the TODO mentioned
' above.
'''
return
job['status'] = 'running'
# TODO: Push job changes to the database
self.available_cpus -= job['requested_cpus']
self.available_memory -= job['requested_memory']
container_command = 'ocr' \
+ ' -i /input/{}'.format(job['id']) \
+ ' -l {}'.format(job['service_args']['lang']) \
+ ' -o /output' \
+ ' --keep-intermediates' \
+ ' --nCores {}'.format(job['requested_cpus'])
container_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr'
container_mount = '/media/sf_files/=/input/'
'''
' Swarm mode is intendet to run containers which are meant to serve a
' non terminating service like a webserver. In order to process the
' occuring jobs it is necessary to use one-shot (terminating)
' containers. These one-shot containers are spawned with a programm
' called JaaS¹ (Jobs as a Service), which is described in Alex Ellis'
' short article "One-shot containers on Docker Swarm"².
'
' ¹ https://github.com/alexellis/jaas
' ² https://blog.alexellis.io/containers-on-swarm/
'''
cmd = ['jaas', 'run'] \
+ ['--command', container_command] \
+ ['--image', container_image] \
+ ['--mount', container_mount] \
+ ['--timeout', '86400s']
completed_process = subprocess.run(
cmd,
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL
)
self.available_cpus += job['requested_cpus']
self.available_memory += job['requested_memory']
if (completed_process.returncode == 0):
job['status'] = 'finished'
else:
job['status'] = 'failed'
# TODO: Push job changes to the database
return