forked from compulab/i3m-linux-daemon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nvml-tools.c
150 lines (116 loc) · 3.53 KB
/
nvml-tools.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*
* Copyright (C) 2016, CompuLab ltd.
* Author: Andrey Gelman <andrey.gelman@compulab.co.il>
* License: GNU GPLv2 or later, at your option
*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include "common.h"
#include "nvml-tools.h"
/*
* Dynamically load nvml library.
* As nvml lib is distributed along with proprietary nvidia graphics driver,
* we must be able to tolerate its absence on systems employing other drivers.
* This is achieved by taking control over nvml library loading process.
*/
/* NVML DLL file name */
#define NVML_DLL_FILE "libnvidia-ml.so"
/* load a function symbol from the DLL */
#define load_symbol(dllh, symbol, errstr, gotoerr) do { \
dllh->symbol = dlsym(dllh->dll, #symbol); \
errstr = dlerror(); \
if (errstr != NULL) \
goto gotoerr; \
} while (0)
static NvmlHandle *nvml_dll_load(void)
{
void *dll;
NvmlHandle *dllh;
char *dlerrstr;
dll = dlopen(NVML_DLL_FILE, RTLD_NOW);
if ( !dll ) {
dlerrstr = dlerror();
goto dll_out_err0;
}
dllh = (NvmlHandle *)calloc(1, sizeof(NvmlHandle));
dllh->dll = dll;
load_symbol(dllh, nvmlInit, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlShutdown, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlSystemGetDriverVersion, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlSystemGetNVMLVersion, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetCount, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetHandleByIndex, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetName, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetTemperature, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetPowerManagementMode, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetPowerManagementLimit, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetPowerManagementLimitConstraints, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceGetPowerManagementDefaultLimit, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceSetPersistenceMode, dlerrstr, dll_out_err1);
load_symbol(dllh, nvmlDeviceSetPowerManagementLimit, dlerrstr, dll_out_err1);
return dllh;
dll_out_err1:
dlclose(dll);
free(dllh);
dll_out_err0:
sloge("%s", dlerrstr);
return NULL;
}
static void nvml_dll_cleanup(NvmlHandle *dllh)
{
dlclose(dllh->dll);
free(dllh);
}
/*
* NVML library tools.
* Reference:
* NVML API Reference Manual (https://developer.nvidia.com/nvidia-management-library-nvml)
*/
#define NVML_DEVICE_DEFAULT_INDEX 0
void nvml_cleanup(int status, void *_dllh)
{
NvmlHandle *dllh = (NvmlHandle *)_dllh;
dllh->nvmlShutdown();
nvml_dll_cleanup(dllh);
}
NvmlHandle *nvml_init(void)
{
nvmlReturn_t err;
unsigned int count;
NvmlHandle *dllh;
dllh = nvml_dll_load();
if (dllh == NULL)
goto nvml_out_err0;
err = dllh->nvmlInit();
if (err != NVML_SUCCESS) {
sloge("nvml: could not initialize NVML: %d", err);
goto nvml_out_err1;
}
err = dllh->nvmlDeviceGetCount(&count);
if (err != NVML_SUCCESS) {
sloge("nvml: could not get device count: %d", err);
goto nvml_out_err2;
}
if (count == 0) {
sloge("nvml: no GPU card present");
err = NVML_ERROR_NOT_FOUND;
goto nvml_out_err2;
}
else if (count > 1) {
slogw("nvml: %d GPU cards present, however, only one will be monitored", count);
}
err = dllh->nvmlDeviceGetHandleByIndex(NVML_DEVICE_DEFAULT_INDEX, &dllh->device);
if (err != NVML_SUCCESS) {
sloge("nvml: could not get device handle: %d", err);
goto nvml_out_err2;
}
return dllh;
nvml_out_err2:
dllh->nvmlShutdown();
nvml_out_err1:
nvml_dll_cleanup(dllh);
nvml_out_err0:
return NULL;
}