import docker import subprocess class Swarm: def __init__(self): self.docker = docker.from_env() self.checkout() def checkout(self): cpus = 0 memory = 0 for node in self.docker.nodes.list(filters={'role': 'worker'}): if node.attrs.get('Status').get('State') == 'ready': cpus += 0 or node.attrs \ .get('Description') \ .get('Resources') \ .get('NanoCPUs') memory += 0 or node.attrs \ .get('Description') \ .get('Resources') \ .get('MemoryBytes') ''' ' For whatever reason the Python Docker SDK provides a CPU count in ' nano (10^-6), whilst this is not that handy, it gets converted. ''' cpus *= 10 ** -9 ''' ' For a more natural handling the memory information ' gets converted from bytes to megabytes. ''' memory *= 10 ** -6 self.cpus = int(cpus) self.memory = int(memory) self.available_cpus = self.cpus self.available_memory = self.memory def run(self, job): if self.available_cpus < job['requested_cpus'] or \ self.available_memory < job['requested_memory']: print('Not enough ressources available.') ''' ' TODO: At this point the scheduler thinks that the job gets ' processed, which apparently is not the case. So the job ' needs to get rescheduled and gain a new chance to get ' processed (next). ' ' Note: Maybe it is a good idea to create a method that checks if ' enough ressources are available before the run method gets ' executed. This would replace the need of the TODO mentioned ' above. ''' return job['status'] = 'running' # TODO: Push job changes to the database self.available_cpus -= job['requested_cpus'] self.available_memory -= job['requested_memory'] container_command = 'ocr' \ + ' -i /input/{}'.format(job['id']) \ + ' -l {}'.format(job['service_args']['lang']) \ + ' -o /output' \ + ' --keep-intermediates' \ + ' --nCores {}'.format(job['requested_cpus']) container_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr' container_mount = '/media/sf_files/=/input/' ''' ' Swarm mode is intendet to run containers which are meant to serve a ' non terminating service like a webserver. In order to process the ' occuring jobs it is necessary to use one-shot (terminating) ' containers. These one-shot containers are spawned with a programm ' called JaaS¹ (Jobs as a Service), which is described in Alex Ellis' ' short article "One-shot containers on Docker Swarm"². ' ' ¹ https://github.com/alexellis/jaas ' ² https://blog.alexellis.io/containers-on-swarm/ ''' cmd = ['jaas', 'run'] \ + ['--command', container_command] \ + ['--image', container_image] \ + ['--mount', container_mount] \ + ['--timeout', '86400s'] completed_process = subprocess.run( cmd, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL ) self.available_cpus += job['requested_cpus'] self.available_memory += job['requested_memory'] if (completed_process.returncode == 0): job['status'] = 'finished' else: job['status'] = 'failed' # TODO: Push job changes to the database return