Skip to content

Commit

Permalink
chore: add feature flag for mellanox fw reset
Browse files Browse the repository at this point in the history
To not enable the new feature by default we want to add a feature flag first.

Signed-off-by: Tobias Giese <tgiese@nvidia.com>
  • Loading branch information
tobiasgiese committed Aug 5, 2024
1 parent 0f1a72e commit 08b524e
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 4 deletions.
15 changes: 15 additions & 0 deletions cmd/sriov-network-config-daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"github.com/spf13/cobra"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
Expand All @@ -41,6 +42,7 @@ import (
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/daemon"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper"
snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms"
Expand Down Expand Up @@ -276,6 +278,18 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
}
go nodeWriter.Run(stopCh, refreshCh, syncCh)

// Init feature gates once to prevent race conditions.
defaultConfig := &sriovnetworkv1.SriovOperatorConfig{}
err = kClient.Get(context.Background(), types.NamespacedName{Namespace: vars.Namespace, Name: consts.DefaultConfigName}, defaultConfig)
if err != nil {
log.Log.Error(err, "Failed to get default SriovOperatorConfig object")
return err
}
featureGates := featuregate.New()
featureGates.Init(defaultConfig.Spec.FeatureGates)
vars.MlxPluginFwReset = featureGates.IsEnabled(consts.MellanoxFirmwareResetFeatureGate)
log.Log.Info("Enabled featureGates", "featureGates", featureGates.String())

setupLog.V(0).Info("Starting SriovNetworkConfigDaemon")
err = daemon.New(
kClient,
Expand All @@ -288,6 +302,7 @@ func runStartCmd(cmd *cobra.Command, args []string) error {
syncCh,
refreshCh,
eventRecorder,
featureGates,
startOpts.disabledPlugins,
).Run(stopCh, exitCh)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions pkg/consts/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ const (
// ManageSoftwareBridgesFeatureGate: enables management of software bridges by the operator
ManageSoftwareBridgesFeatureGate = "manageSoftwareBridges"

// MellanoxFirmwareResetFeatureGate: enables the firmware reset via mstfwreset before a reboot
MellanoxFirmwareResetFeatureGate = "mellanoxFirmwareReset"

// The path to the file on the host filesystem that contains the IB GUID distribution for IB VFs
InfinibandGUIDConfigFilePath = SriovConfBasePath + "/infiniband/guids"
)
Expand Down
14 changes: 14 additions & 0 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"math/rand"
"os/exec"
"reflect"
"sync"
"time"

Expand All @@ -23,6 +24,7 @@ import (
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
sninformer "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/informers/externalversions"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper"
snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms"
Expand Down Expand Up @@ -82,6 +84,8 @@ type Daemon struct {
workqueue workqueue.RateLimitingInterface

eventRecorder *EventRecorder

featureGate featuregate.FeatureGate
}

func New(
Expand All @@ -95,6 +99,7 @@ func New(
syncCh <-chan struct{},
refreshCh chan<- Message,
er *EventRecorder,
featureGates featuregate.FeatureGate,
disabledPlugins []string,
) *Daemon {
return &Daemon{
Expand All @@ -113,6 +118,7 @@ func New(
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(updateDelay), 1)},
workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, maxUpdateBackoff)), "SriovNetworkNodeState"),
eventRecorder: er,
featureGate: featureGates,
disabledPlugins: disabledPlugins,
}
}
Expand Down Expand Up @@ -286,6 +292,7 @@ func (dn *Daemon) operatorConfigAddHandler(obj interface{}) {
}

func (dn *Daemon) operatorConfigChangeHandler(old, new interface{}) {
oldCfg := old.(*sriovnetworkv1.SriovOperatorConfig)
newCfg := new.(*sriovnetworkv1.SriovOperatorConfig)
if newCfg.Namespace != vars.Namespace || newCfg.Name != consts.DefaultConfigName {
log.Log.V(2).Info("unsupported SriovOperatorConfig", "namespace", newCfg.Namespace, "name", newCfg.Name)
Expand All @@ -299,6 +306,13 @@ func (dn *Daemon) operatorConfigChangeHandler(old, new interface{}) {
dn.disableDrain = newDisableDrain
log.Log.Info("Set Disable Drain", "value", dn.disableDrain)
}

if !reflect.DeepEqual(oldCfg.Spec.FeatureGates, newCfg.Spec.FeatureGates) {
dn.featureGate.Init(newCfg.Spec.FeatureGates)
log.Log.Info("Updated featureGates", "featureGates", dn.featureGate.String())
}

vars.MlxPluginFwReset = dn.featureGate.IsEnabled(consts.MellanoxFirmwareResetFeatureGate)
}

func (dn *Daemon) nodeStateSyncHandler() error {
Expand Down
9 changes: 6 additions & 3 deletions pkg/daemon/daemon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem"

snclient "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/fake"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock"
mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift"
plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/fake"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem"
)

func TestConfigDaemon(t *testing.T) {
Expand Down Expand Up @@ -151,6 +151,8 @@ var _ = Describe("Config Daemon", func() {
vendorHelper.EXPECT().PrepareNMUdevRule([]string{"0x1014", "0x154c"}).Return(nil).AnyTimes()
vendorHelper.EXPECT().PrepareVFRepUdevRule().Return(nil).AnyTimes()

featureGates := featuregate.New()

sut = New(
kClient,
snclient,
Expand All @@ -162,6 +164,7 @@ var _ = Describe("Config Daemon", func() {
syncCh,
refreshCh,
er,
featureGates,
nil,
)

Expand Down
6 changes: 5 additions & 1 deletion pkg/plugins/mellanox/mellanox_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper"
plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
mlx "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vendors/mellanox"
)

Expand Down Expand Up @@ -211,7 +212,10 @@ func (p *MellanoxPlugin) Apply() error {
if err := p.helpers.MlxConfigFW(attributesToChange); err != nil {
return err
}
return p.helpers.MlxResetFW(pciAddressesToReset)
if vars.MlxPluginFwReset {
return p.helpers.MlxResetFW(pciAddressesToReset)
}
return nil
}

// nicHasExternallyManagedPFs returns true if one of the ports(interface) of the NIC is marked as externally managed
Expand Down
3 changes: 3 additions & 0 deletions pkg/vars/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ var (
// ManageSoftwareBridges global variable which reflects state of manageSoftwareBridges feature
ManageSoftwareBridges = false

// MlxPluginFwReset global variable enables mstfwreset before rebooting a node on VF changes
MlxPluginFwReset = false

// FilesystemRoot used by test to mock interactions with filesystem
FilesystemRoot = ""

Expand Down

0 comments on commit 08b524e

Please sign in to comment.