Skip to content

Commit

Permalink
Fixing DevicePlugin upgrade from v1.x to v2.x (kubernetes-sigs#679)
Browse files Browse the repository at this point in the history
In v1.x we had 2 type of Daemonsets: DevicePlugin and ModuleLoader,
which could be distingished by the DaemonsetRole label
In v2.x, there only 1 type of daemonset, DevicePlugin, so the
DaemonsetRole labels was removed.
During DevicePlugin reconciliation, reconcile get all the device plugin
Ds based only on module name label, which means that in case there is a
v1.1 ModuleLoader DS running it will also be chosen. Once reconciler
tries to reconcile ModuleLoader DS, it will fail
This PR, updates the function that chooses the DevicePlugin DS, by
removing old ModuleLoader Ds based on Role label
  • Loading branch information
yevgeny-shnaidman authored Jan 2, 2024
1 parent 2108566 commit 37f139f
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
3 changes: 3 additions & 0 deletions internal/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const (
PodType = "kmm.node.kubernetes.io/pod-type"
PodHashAnnotation = "kmm.node.kubernetes.io/last-hash"
KernelLabel = "kmm.node.kubernetes.io/kernel-version.full"
DaemonSetRole = "kmm.node.kubernetes.io/role"

WorkerPodVersionLabelPrefix = "beta.kmm.node.kubernetes.io/version-worker-pod"
DevicePluginVersionLabelPrefix = "beta.kmm.node.kubernetes.io/version-device-plugin"
Expand All @@ -21,5 +22,7 @@ const (
PublicSignDataKey = "cert"
PrivateSignDataKey = "key"

ModuleLoaderRoleLabelValue = "module-loader"

OperatorNamespaceEnvVar = "OPERATOR_NAMESPACE"
)
11 changes: 10 additions & 1 deletion internal/controllers/device_plugin_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,16 @@ func (dprh *devicePluginReconcilerHelper) getModuleDevicePluginDaemonSets(ctx co
if err := dprh.client.List(ctx, &dsList, opts...); err != nil {
return nil, fmt.Errorf("could not list DaemonSets: %v", err)
}
return dsList.Items, nil

devicePluginsList := make([]appsv1.DaemonSet, 0, len(dsList.Items))
// remove the older version module loader daemonsets
for _, ds := range dsList.Items {
if ds.GetLabels()[constants.DaemonSetRole] != constants.ModuleLoaderRoleLabelValue {
devicePluginsList = append(devicePluginsList, ds)
}
}

return devicePluginsList, nil
}

func (dprh *devicePluginReconcilerHelper) handleDevicePlugin(ctx context.Context, mod *kmmv1beta1.Module, existingDevicePluginDS []appsv1.DaemonSet) error {
Expand Down
64 changes: 64 additions & 0 deletions internal/controllers/device_plugin_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -839,3 +839,67 @@ var _ = Describe("DevicePluginReconciler_getExistingDS", func() {
Expect(res).To(Equal(&ds))
})
})

var _ = Describe("DevicePluginReconciler_getModuleDevicePluginDaemonSets", func() {
var (
ctrl *gomock.Controller
clnt *client.MockClient
dprh devicePluginReconcilerHelper
)

BeforeEach(func() {
ctrl = gomock.NewController(GinkgoT())
clnt = client.NewMockClient(ctrl)
dprh = devicePluginReconcilerHelper{
client: clnt,
}
})

ctx := context.Background()

It("list failed", func() {
clnt.EXPECT().List(ctx, gomock.Any(), gomock.Any()).Return(fmt.Errorf("some error"))

dsList, err := dprh.getModuleDevicePluginDaemonSets(ctx, "name", "namespace")

Expect(err).ToNot(BeNil())
Expect(dsList).To(BeNil())
})

It("good flow, return only device plugin DSs, either v2 or v1", func() {
ds1 := appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.ModuleNameLabel: "some name",
},
},
}
ds2 := appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.ModuleNameLabel: "some name",
constants.DaemonSetRole: constants.ModuleLoaderRoleLabelValue,
},
},
}
ds3 := appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
constants.ModuleNameLabel: "some name",
constants.DaemonSetRole: "some role",
},
},
}
clnt.EXPECT().List(ctx, gomock.Any(), gomock.Any()).DoAndReturn(
func(_ interface{}, list *appsv1.DaemonSetList, _ ...interface{}) error {
list.Items = []appsv1.DaemonSet{ds1, ds2, ds3}
return nil
},
)

dsList, err := dprh.getModuleDevicePluginDaemonSets(ctx, "name", "namespace")

Expect(err).NotTo(HaveOccurred())
Expect(dsList).To(Equal([]appsv1.DaemonSet{ds1, ds3}))
})
})

0 comments on commit 37f139f

Please sign in to comment.