-
Notifications
You must be signed in to change notification settings - Fork 2
/
gpu_handler.py
92 lines (78 loc) · 3.04 KB
/
gpu_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import re
import os
import docker
class GPUHandler(object):
def __init__(self):
"""
small class for handling gpu minor monitoring
:param client: docker client as obtained by docker.from_env()
"""
self.client = docker.from_env()
self.minors = self.get_gpu_minors()
self._assigned_minors = []
self._free_minors = []
@staticmethod
def get_gpu_minors():
"""
Returns the GPU minors available on the system
:return: GPU minors available on the system
"""
minors = []
for dev in os.listdir("/dev"):
match_dt = re.search(r'nvidia(\d+)', dev)
if match_dt is not None:
minor = int(match_dt.group(1))
minors.append(minor)
return minors
@property
def assigned_minors(self):
"""
returns currently assigned minors
:return: currently assigned minors
"""
self.update_minors()
return self._assigned_minors
@property
def free_minors(self):
"""
returns currently free minors
:return: currently free minors
"""
self.update_minors()
return self._free_minors
def update_minors(self):
"""
updates assigned and free minors by looking at the running containers
:return: None
"""
# get assigned gpus
assigned_gpus = []
# look in each running container
for container in self.client.containers.list():
if 'Config' in container.attrs:
if 'Env' in container.attrs['Config']:
for el in container.attrs['Config']['Env']:
if el.startswith('NVIDIA_VISIBLE_DEVICES'):
minor_str = el.split('=')[1]
if minor_str.lower() == 'all':
for gpu_minor in self.minors:
assigned_gpus.append(gpu_minor)
# elif minor_str.lower() == 'none':
# # here no minors will be used
# logging.debug("Using no minors..")
# elif minor_str.lower() == 'void' or minor_str.trim() == "":
# logging.debug("Insecure option: Please do not use empty minor option for containers!")
else:
minor_list = minor_str.split(",")
for gpu_minor in minor_list:
gpu_minor = int(gpu_minor)
if gpu_minor in self.minors:
assigned_gpus.append(gpu_minor)
free_gpus = []
# remove all assigned
for gpu_minor in self.minors:
# only add unassigend ones
if gpu_minor not in assigned_gpus:
free_gpus.append(gpu_minor)
self._assigned_minors = assigned_gpus
self._free_minors = free_gpus