Skip to content

Commit

Permalink
feat: implement MlxResetFW to reset the FW on VF changes
Browse files Browse the repository at this point in the history
Signed-off-by: Tobias Giese <tgiese@nvidia.com>
  • Loading branch information
tobiasgiese committed Aug 5, 2024
1 parent 511b470 commit 0f1a72e
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 2 deletions.
4 changes: 3 additions & 1 deletion Dockerfile.sriov-network-config-daemon
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ RUN make _build-sriov-network-config-daemon BIN_PATH=build/_output/cmd

FROM quay.io/centos/centos:stream9
ARG MSTFLINT=mstflint
RUN ARCH_DEP_PKGS=$(if [ "$(uname -m)" != "s390x" ]; then echo -n ${MSTFLINT} ; fi) && yum -y install hwdata $ARCH_DEP_PKGS && yum clean all
# We have to ensure that pciutils is installed. This package is needed for mstfwreset to succeed.
# xref pkg/vendors/mellanox/mellanox.go#L150
RUN ARCH_DEP_PKGS=$(if [ "$(uname -m)" != "s390x" ]; then echo -n ${MSTFLINT} ; fi) && yum -y install hwdata pciutils $ARCH_DEP_PKGS && yum clean all
LABEL io.k8s.display-name="sriov-network-config-daemon" \
io.k8s.description="This is a daemon that manage and config sriov network devices in Kubernetes cluster"
COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/sriov-network-config-daemon /usr/bin/
Expand Down
14 changes: 14 additions & 0 deletions pkg/helper/mock/mock_helper.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion pkg/plugins/mellanox/mellanox_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type MellanoxPlugin struct {
helpers helper.HostHelpersInterface
}

var pciAddressesToReset []string
var attributesToChange map[string]mlx.MlxNic
var mellanoxNicsStatus map[string]map[string]sriovnetworkv1.InterfaceExt
var mellanoxNicsSpec map[string]sriovnetworkv1.Interface
Expand Down Expand Up @@ -52,6 +53,7 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS
needDrain = false
needReboot = false
err = nil
pciAddressesToReset = []string{}
attributesToChange = map[string]mlx.MlxNic{}
mellanoxNicsStatus = map[string]map[string]sriovnetworkv1.InterfaceExt{}
mellanoxNicsSpec = map[string]sriovnetworkv1.Interface{}
Expand Down Expand Up @@ -132,6 +134,10 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS
if needReboot || changeWithoutReboot {
attributesToChange[ifaceSpec.PciAddress] = *attrs
}

if needReboot {
pciAddressesToReset = append(pciAddressesToReset, ifaceSpec.PciAddress)
}
}

// Set total VFs to 0 for mellanox interfaces with no spec
Expand Down Expand Up @@ -202,7 +208,10 @@ func (p *MellanoxPlugin) Apply() error {
return nil
}
log.Log.Info("mellanox plugin Apply()")
return p.helpers.MlxConfigFW(attributesToChange)
if err := p.helpers.MlxConfigFW(attributesToChange); err != nil {
return err
}
return p.helpers.MlxResetFW(pciAddressesToReset)
}

// nicHasExternallyManagedPFs returns true if one of the ports(interface) of the NIC is marked as externally managed
Expand Down
18 changes: 18 additions & 0 deletions pkg/vendors/mellanox/mellanox.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strconv"
"strings"

kerrors "k8s.io/apimachinery/pkg/util/errors"
"sigs.k8s.io/controller-runtime/pkg/log"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
Expand Down Expand Up @@ -60,6 +61,7 @@ type MellanoxInterface interface {
GetMlxNicFwData(pciAddress string) (current, next *MlxNic, err error)

MlxConfigFW(attributesToChange map[string]MlxNic) error
MlxResetFW(pciAddresses []string) error
}

type mellanoxHelper struct {
Expand Down Expand Up @@ -141,6 +143,22 @@ func (m *mellanoxHelper) GetMellanoxBlueFieldMode(PciAddress string) (BlueFieldM
return -1, fmt.Errorf("MellanoxBlueFieldMode(): unknown device status for %s", PciAddress)
}

func (m *mellanoxHelper) MlxResetFW(pciAddresses []string) error {
log.Log.Info("mellanox-plugin resetFW()")
var errs []error
for _, pciAddress := range pciAddresses {
cmdArgs := []string{"-d", pciAddress, "--skip_driver", "-l", "3", "-y", "reset"}
log.Log.Info("mellanox-plugin: resetFW()", "cmd-args", cmdArgs)
// We have to ensure that pciutils is installed into the container image Dockerfile.sriov-network-config-daemon
_, stderr, err := m.utils.RunCommand("mstfwreset", cmdArgs...)
if err != nil {
log.Log.Error(err, "mellanox-plugin resetFW(): failed", "stderr", stderr)
errs = append(errs, err)
}
}
return kerrors.NewAggregate(errs)
}

func (m *mellanoxHelper) MlxConfigFW(attributesToChange map[string]MlxNic) error {
log.Log.Info("mellanox-plugin configFW()")
for pciAddr, fwArgs := range attributesToChange {
Expand Down
14 changes: 14 additions & 0 deletions pkg/vendors/mellanox/mock/mock_mellanox.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0f1a72e

Please sign in to comment.