Skip to content

Commit

Permalink
Merge pull request #230 from NVIDIA/update_of_dependencies
Browse files Browse the repository at this point in the history
Dependency from obsolete package was removed
  • Loading branch information
nvvfedorov authored Jan 11, 2024
2 parents 9febb6c + 8da097c commit 30d4ddc
Show file tree
Hide file tree
Showing 8 changed files with 438 additions and 52 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ replace (

require (
github.com/NVIDIA/go-dcgm v0.0.0-20240108230649-3c233ee2a242
github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20211102125545-5a2c58442e48
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231031105836-a160364ba1cc
github.com/avast/retry-go/v4 v4.5.1
github.com/bits-and-blooms/bitset v1.12.0
github.com/gorilla/mux v1.8.1
Expand Down Expand Up @@ -75,7 +75,7 @@ require (
github.com/prometheus/procfs v0.11.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/crypto v0.16.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.13.0 // indirect
golang.org/x/sync v0.5.0 // indirect
Expand Down
10 changes: 4 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@ github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jB
github.com/Microsoft/hcsshim v0.0.0-20190417211021-672e52e9209d/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg=
github.com/NVIDIA/go-dcgm v0.0.0-20240108230649-3c233ee2a242 h1:H+Md4NKlMvN/rTNCVMFqRGXAgag0dRs2NsEEIfTRReM=
github.com/NVIDIA/go-dcgm v0.0.0-20240108230649-3c233ee2a242/go.mod h1:eAZdHcOerdg1hyVoWwJ6jGQ+bxl95PfreT1S7ukI7mY=
github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20211102125545-5a2c58442e48 h1:JO/JF5CBte9mvATbhoh32swu9erf07ZdLgwFj8u21UQ=
github.com/NVIDIA/gpu-monitoring-tools v0.0.0-20211102125545-5a2c58442e48/go.mod h1:oKPJa5eOTkWvlT4/Y4D8Nds44Fzmww5HUK+xwO+DwTA=
github.com/NVIDIA/gpu-monitoring-tools/bindings/go/dcgm v0.0.0-20210325210537-29b4f1784f18/go.mod h1:8qXwltEzU3idjUcVpMOv3FNgxxbDeXZPGMLyc/khWiY=
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231031105836-a160364ba1cc h1:cpPqTnfDcYPZyvc55pdf+3PnHYZRolqp95HH9ORa12o=
github.com/NVIDIA/go-nvml v0.12.0-1.0.20231031105836-a160364ba1cc/go.mod h1:7ruy85eOM73muOc/I37euONSwEyFqZsv5ED9AogD4G0=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/OpenPeeDeeP/depguard v1.0.0/go.mod h1:7/4sitnI9YlQgTLLk734QlzXT8DuHVnAyztLplQjk+o=
Expand Down Expand Up @@ -368,7 +367,6 @@ github.com/gophercloud/gophercloud v0.1.0/go.mod h1:vxM41WHh5uqHVBMZHzuwNOHh8XEo
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/mux v1.7.0/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
Expand Down Expand Up @@ -728,8 +726,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY=
golang.org/x/crypto v0.16.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
Expand Down
4 changes: 4 additions & 0 deletions internal/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
`/internal`

Code intended for private use only, not for external import.
Note that this layout pattern is enforced by the Go compiler itself. See the Go 1.4 [`release notes`](https://golang.org/doc/go1.4#internalpackages) for more details.
114 changes: 114 additions & 0 deletions internal/pkg/nvmlprovider/provider.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package nvmlprovider

import (
"errors"
"fmt"
"strconv"
"strings"
"sync"

"github.com/NVIDIA/go-nvml/pkg/nvml"
"github.com/sirupsen/logrus"
)

var nvmlOnce *sync.Once = new(sync.Once)

type MIGDeviceInfo struct {
ParentUUID string
GPUInstanceID int
ComputeInstanceID int
}

// GetMIGDeviceInfoByID returns information about MIG DEVICE by ID
func GetMIGDeviceInfoByID(uuid string) (*MIGDeviceInfo, error) {
var err error

nvmlOnce.Do(func() {
ret := nvml.Init()
if ret != nvml.SUCCESS {
err = errors.New(nvml.ErrorString(ret))
logrus.Error("Can not init NVML library")
}
})
if err != nil {
return nil, err
}

// 1. With drivers >= R470 (470.42.01+), each MIG device is assigned a GPU UUID starting
// with MIG-<UUID>.

device, ret := nvml.DeviceGetHandleByUUID(uuid)
if ret == nvml.SUCCESS {
parentDevice, ret := device.GetDeviceHandleFromMigDeviceHandle()
if ret != nvml.SUCCESS {
return nil, errors.New(nvml.ErrorString(ret))
}

parentUUID, ret := parentDevice.GetUUID()
if ret != nvml.SUCCESS {
return nil, errors.New(nvml.ErrorString(ret))
}

gi, ret := device.GetGpuInstanceId()
if ret != nvml.SUCCESS {
return nil, errors.New(nvml.ErrorString(ret))
}

ci, ret := device.GetComputeInstanceId()
if ret != nvml.SUCCESS {
return nil, errors.New(nvml.ErrorString(ret))
}

return &MIGDeviceInfo{
ParentUUID: parentUUID,
GPUInstanceID: gi,
ComputeInstanceID: ci,
}, nil
}

// 2. With drivers < R470 (e.g. R450 and R460), each MIG device is enumerated by
// specifying the CI and the corresponding parent GI. The format follows this
// convention: MIG-<GPU-UUID>/<GPU instance ID>/<compute instance ID>.

tokens := strings.SplitN(uuid, "-", 2)
if len(tokens) != 2 || tokens[0] != "MIG" {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
}

tokens = strings.SplitN(tokens[1], "/", 3)
if len(tokens) != 3 || !strings.HasPrefix(tokens[0], "GPU-") {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
}

gi, err := strconv.Atoi(tokens[1])
if err != nil {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
}

ci, err := strconv.Atoi(tokens[2])
if err != nil {
return nil, fmt.Errorf("Unable to parse UUID as MIG device")
}

return &MIGDeviceInfo{
ParentUUID: tokens[0],
GPUInstanceID: gi,
ComputeInstanceID: ci,
}, nil
}
102 changes: 102 additions & 0 deletions internal/pkg/nvmlprovider/provider_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package nvmlprovider

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestGetMIGDeviceInfoByID_When_DriverVersion_Below_R470(t *testing.T) {
tests := []struct {
name string
uuid string
expectedGPU string
expectedGi int
expectedCi int
expectedError bool
}{
{
name: "Successfull Parsing",
uuid: "MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/5",
expectedGPU: "GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5",
expectedGi: 1,
expectedCi: 5,
},
{
name: "Fail, Missing MIG at the beginning of UUID",
uuid: "GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/5",
expectedError: true,
},
{
name: "Fail, Missing GPU at the beginning of GPU UUID",
uuid: "MIG-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/5",
expectedError: true,
},
{
name: "Fail, GI not parsable",
uuid: "MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/xx/5",
expectedError: true,
},
{
name: "Fail, CI not a parsable",
uuid: "MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/xx",
expectedError: true,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
deviceInfo, err := GetMIGDeviceInfoByID(tc.uuid)
if tc.expectedError && err != nil {
return
}
if tc.expectedError && err == nil {
t.Fatalf("Expected an error, but didn't get one: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
tc.uuid,
deviceInfo.ParentUUID,
deviceInfo.GPUInstanceID,
deviceInfo.ComputeInstanceID)
}
if !tc.expectedError && err != nil {
t.Fatalf("Unexpected error: %v, uuid: %v, (gpu: %v, gi: %v, ci: %v)",
err,
tc.uuid,
deviceInfo.ParentUUID,
deviceInfo.GPUInstanceID,
deviceInfo.ComputeInstanceID)
}

assert.Equal(t, tc.expectedGPU, deviceInfo.ParentUUID, "MIG UUID parsed incorrectly: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
tc.uuid,
deviceInfo.ParentUUID,
deviceInfo.GPUInstanceID,
deviceInfo.ComputeInstanceID)
assert.Equal(t, tc.expectedGi, deviceInfo.GPUInstanceID, "MIG UUID parsed incorrectly: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
tc.uuid,
deviceInfo.ParentUUID,
deviceInfo.GPUInstanceID,
deviceInfo.ComputeInstanceID)
assert.Equal(t, tc.expectedCi, deviceInfo.ComputeInstanceID, "MIG UUID parsed incorrectly: uuid: %v, (gpu: %v, gi: %v, ci: %v)",
tc.uuid,
deviceInfo.ParentUUID,
deviceInfo.GPUInstanceID,
deviceInfo.ComputeInstanceID)
})
}
}
29 changes: 29 additions & 0 deletions internal/pkg/testutils/testutils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package testutils

import (
"runtime"
"testing"
)

// RequireLinux checks if
func RequireLinux(t *testing.T) {
t.Helper()
if runtime.GOOS != "linux" {
t.Skipf("Test is not supported on %q", runtime.GOOS)
}
}
Loading

0 comments on commit 30d4ddc

Please sign in to comment.