mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-12-25 10:54:18 +00:00
99 lines
3.8 KiB
Python
99 lines
3.8 KiB
Python
|
import docker
|
||
|
import subprocess
|
||
|
|
||
|
|
||
|
class Swarm:
|
||
|
def __init__(self):
|
||
|
self.docker = docker.from_env()
|
||
|
self.checkout()
|
||
|
|
||
|
def checkout(self):
|
||
|
cpus = 0
|
||
|
memory = 0
|
||
|
for node in self.docker.nodes.list(filters={'role': 'worker'}):
|
||
|
if node.attrs.get('Status').get('State') == 'ready':
|
||
|
cpus += 0 or node.attrs \
|
||
|
.get('Description') \
|
||
|
.get('Resources') \
|
||
|
.get('NanoCPUs')
|
||
|
memory += 0 or node.attrs \
|
||
|
.get('Description') \
|
||
|
.get('Resources') \
|
||
|
.get('MemoryBytes')
|
||
|
'''
|
||
|
' For whatever reason the Python Docker SDK provides a CPU count in
|
||
|
' nano (10^-6), whilst this is not that handy, it gets converted.
|
||
|
'''
|
||
|
cpus *= 10 ** -9
|
||
|
'''
|
||
|
' For a more natural handling the memory information
|
||
|
' gets converted from bytes to megabytes.
|
||
|
'''
|
||
|
memory *= 10 ** -6
|
||
|
self.cpus = int(cpus)
|
||
|
self.memory = int(memory)
|
||
|
self.available_cpus = self.cpus
|
||
|
self.available_memory = self.memory
|
||
|
|
||
|
def run(self, job):
|
||
|
if self.available_cpus < job['requested_cpus'] or \
|
||
|
self.available_memory < job['requested_memory']:
|
||
|
print('Not enough ressources available.')
|
||
|
'''
|
||
|
' TODO: At this point the scheduler thinks that the job gets
|
||
|
' processed, which apparently is not the case. So the job
|
||
|
' needs to get rescheduled and gain a new chance to get
|
||
|
' processed (next).
|
||
|
'
|
||
|
' Note: Maybe it is a good idea to create a method that checks if
|
||
|
' enough ressources are available before the run method gets
|
||
|
' executed. This would replace the need of the TODO mentioned
|
||
|
' above.
|
||
|
'''
|
||
|
return
|
||
|
|
||
|
job['status'] = 'running'
|
||
|
# TODO: Push job changes to the database
|
||
|
self.available_cpus -= job['requested_cpus']
|
||
|
self.available_memory -= job['requested_memory']
|
||
|
|
||
|
container_command = 'ocr' \
|
||
|
+ ' -i /input/{}'.format(job['id']) \
|
||
|
+ ' -l {}'.format(job['service_args']['lang']) \
|
||
|
+ ' -o /output' \
|
||
|
+ ' --keep-intermediates' \
|
||
|
+ ' --nCores {}'.format(job['requested_cpus'])
|
||
|
container_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/ocr'
|
||
|
container_mount = '/media/sf_files/=/input/'
|
||
|
'''
|
||
|
' Swarm mode is intendet to run containers which are meant to serve a
|
||
|
' non terminating service like a webserver. In order to process the
|
||
|
' occuring jobs it is necessary to use one-shot (terminating)
|
||
|
' containers. These one-shot containers are spawned with a programm
|
||
|
' called JaaS¹ (Jobs as a Service), which is described in Alex Ellis'
|
||
|
' short article "One-shot containers on Docker Swarm"².
|
||
|
'
|
||
|
' ¹ https://github.com/alexellis/jaas
|
||
|
' ² https://blog.alexellis.io/containers-on-swarm/
|
||
|
'''
|
||
|
cmd = ['jaas', 'run'] \
|
||
|
+ ['--command', container_command] \
|
||
|
+ ['--image', container_image] \
|
||
|
+ ['--mount', container_mount] \
|
||
|
+ ['--timeout', '86400s']
|
||
|
completed_process = subprocess.run(
|
||
|
cmd,
|
||
|
stderr=subprocess.DEVNULL,
|
||
|
stdout=subprocess.DEVNULL
|
||
|
)
|
||
|
|
||
|
self.available_cpus += job['requested_cpus']
|
||
|
self.available_memory += job['requested_memory']
|
||
|
if (completed_process.returncode == 0):
|
||
|
job['status'] = 'finished'
|
||
|
else:
|
||
|
job['status'] = 'failed'
|
||
|
# TODO: Push job changes to the database
|
||
|
|
||
|
return
|