Skip to content

Commit

Permalink
Implement RDMA subsystem mode change
Browse files Browse the repository at this point in the history
Now it's possible to configure RDMA subsystem mode using
SR-IOV Network Operator in systemd mode

We can't configure RDMA subsystem in a daemon mode because
it should be done on host before any network namespace is
created.
  • Loading branch information
e0ne committed Apr 16, 2024
1 parent 7f0ac3b commit 3d19033
Show file tree
Hide file tree
Showing 16 changed files with 268 additions and 103 deletions.
4 changes: 4 additions & 0 deletions api/v1/sriovnetworkpoolconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct {
// Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards,
// even if maxUnavailable is greater than one.
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`

// +kubebuilder:validation:Enum=shared;exclusive
// RDMA subsystem. Allowed value "shared", "exclusive".
RdmaMode string `json:"rdmaMode,omitempty"`
}

type OvsHardwareOffloadConfig struct {
Expand Down
15 changes: 15 additions & 0 deletions cmd/sriov-network-config-daemon/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,21 @@ func phasePre(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helpe
hostHelpers.TryEnableTun()
hostHelpers.TryEnableVhostNet()

if conf.RdmaMode != "" {
rdmaSubsystem, err := hostHelpers.GetRDMASubsystem()
if err != nil {
setupLog.Error(err, "failed to get RDMA subsystem mode")
return fmt.Errorf("failed to get RDMA subsystem mode: %v", err)
}
if rdmaSubsystem != conf.RdmaMode {
err = hostHelpers.SetRDMASubsystem(conf.RdmaMode)
if err != nil {
setupLog.Error(err, "failed to set RDMA subsystem mode")
return fmt.Errorf("failed to set RDMA subsystem mode: %v", err)
}
}
}

return callPlugin(setupLog, PhasePre, conf, hostHelpers)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ spec:
offload'
type: string
type: object
rdmaMode:
description: RDMA subsystem. Allowed value "shared", "exclusive".
enum:
- shared
- exclusive
type: string
type: object
status:
description: SriovNetworkPoolConfigStatus defines the observed state of
Expand Down
99 changes: 1 addition & 98 deletions controllers/drain_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,8 @@ import (

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -48,13 +45,6 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
)

var (
oneNode = intstr.FromInt32(1)
defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{
MaxUnavailable: &oneNode,
NodeSelector: &metav1.LabelSelector{}}}
)

type DrainReconcile struct {
client.Client
Scheme *runtime.Scheme
Expand Down Expand Up @@ -345,94 +335,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (
}

func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
logger := log.FromContext(ctx)
logger.Info("findNodePoolConfig():")
// get all the sriov network pool configs
npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
err := dr.List(ctx, npcl)
if err != nil {
logger.Error(err, "failed to list sriovNetworkPoolConfig")
return nil, nil, err
}

selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{}
nodesInPools := map[string]interface{}{}

for _, npc := range npcl.Items {
// we skip hw offload objects
if npc.Spec.OvsHardwareOffloadConfig.Name != "" {
continue
}

if npc.Spec.NodeSelector == nil {
npc.Spec.NodeSelector = &metav1.LabelSelector{}
}

selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector)
if err != nil {
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector)
return nil, nil, err
}

if selector.Matches(labels.Set(node.Labels)) {
selectedNpcl = append(selectedNpcl, npc.DeepCopy())
}

nodeList := &corev1.NodeList{}
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
if err != nil {
logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector",
"machineConfigPoolName", npc,
"nodeSelector", npc.Spec.NodeSelector)
return nil, nil, err
}

for _, nodeName := range nodeList.Items {
nodesInPools[nodeName.Name] = nil
}
}

if len(selectedNpcl) > 1 {
// don't allow the node to be part of multiple pools
err = fmt.Errorf("node is part of more then one pool")
logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl)
return nil, nil, err
} else if len(selectedNpcl) == 1 {
// found one pool for our node
logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0])
selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector)
if err != nil {
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector)
return nil, nil, err
}

// list all the nodes that are also part of this pool and return them
nodeList := &corev1.NodeList{}
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
if err != nil {
logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector)
return nil, nil, err
}

return selectedNpcl[0], nodeList.Items, nil
} else {
// in this case we get all the nodes and remove the ones that already part of any pool
logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl)
nodeList := &corev1.NodeList{}
err = dr.List(ctx, nodeList)
if err != nil {
logger.Error(err, "failed to list all the nodes")
return nil, nil, err
}

defaultNodeLists := []corev1.Node{}
for _, nodeObj := range nodeList.Items {
if _, exist := nodesInPools[nodeObj.Name]; !exist {
defaultNodeLists = append(defaultNodeLists, nodeObj)
}
}
return defaultNpcl, defaultNodeLists, nil
}
return utils.FindNodePoolConfig(ctx, node, dr.Client)
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
13 changes: 13 additions & 0 deletions controllers/sriovnetworkpoolconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ func (r *SriovNetworkPoolConfigReconciler) Reconcile(ctx context.Context, req ct
return reconcile.Result{}, err
}

// RdmaMode could be set in systemd mode only
if instance.Spec.RdmaMode != "" {
operatorConfig := &sriovnetworkv1.SriovOperatorConfig{}
err := r.Get(ctx, types.NamespacedName{Namespace: vars.Namespace, Name: constants.DefaultConfigName}, operatorConfig)
if err != nil {
logger.Error(err, "failed to list SriovOperatorConfig")
return reconcile.Result{}, err
}
if operatorConfig.Spec.ConfigurationMode == sriovnetworkv1.DaemonConfigurationMode {
logger.Info("rdmaSpec is ignored in 'daemon' configuration mode")
}
}

// we don't need a finalizer for pools that doesn't use the ovs hardware offload feature
if instance.Spec.OvsHardwareOffloadConfig.Name == "" {
return ctrl.Result{}, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ spec:
offload'
type: string
type: object
rdmaMode:
description: RDMA subsystem. Allowed value "shared", "exclusive".
enum:
- shared
- exclusive
type: string
type: object
status:
description: SriovNetworkPoolConfigStatus defines the observed state of
Expand Down
3 changes: 3 additions & 0 deletions deployment/sriov-network-operator/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@ rules:
- apiGroups: [ "config.openshift.io" ]
resources: [ "infrastructures" ]
verbs: [ "get", "list", "watch" ]
- apiGroups: [ "sriovnetwork.openshift.io" ]
resources: [ "sriovnetworkpoolconfigs" ]
verbs: [ "get", "list", "watch" ]
2 changes: 1 addition & 1 deletion deployment/sriov-network-operator/templates/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ spec:
- name: OVS_CNI_IMAGE
value: {{ .Values.images.ovsCni }}
- name: RDMA_CNI_IMAGE
value: { { .Values.images.rdmaCni } }
value: {{ .Values.images.rdmaCni }}
- name: SRIOV_DEVICE_PLUGIN_IMAGE
value: {{ .Values.images.sriovDevicePlugin }}
- name: NETWORK_RESOURCES_INJECTOR_IMAGE
Expand Down
3 changes: 3 additions & 0 deletions pkg/consts/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ const (
VdpaTypeVirtio = "virtio"
VdpaTypeVhost = "vhost"

RdmaSubsystemModeShared = "shared"
RdmaSubsystemModeExclusive = "exclusive"

ClusterTypeOpenshift = "openshift"
ClusterTypeKubernetes = "kubernetes"

Expand Down
16 changes: 15 additions & 1 deletion pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"sync"
"time"

corev1 "k8s.io/api/core/v1"

"golang.org/x/time/rate"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -439,7 +441,19 @@ func (dn *Daemon) nodeStateSyncHandler() error {
// When using systemd configuration we write the file
if vars.UsingSystemdMode {
log.Log.V(0).Info("nodeStateSyncHandler(): writing systemd config file to host")
systemdConfModified, err := systemd.WriteConfFile(dn.desiredNodeState)
// get node object
node := &corev1.Node{}
err := dn.client.Get(context.TODO(), client.ObjectKey{Name: vars.NodeName}, node)
if err != nil {
log.Log.Error(err, "nodeStateSyncHandler(): failed to get node object")
return err
}
netPoolConfig, _, err := utils.FindNodePoolConfig(context.Background(), node, dn.client)
if err != nil {
log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node")
}

systemdConfModified, err := systemd.WriteConfFile(dn.desiredNodeState, netPoolConfig)
if err != nil {
log.Log.Error(err, "nodeStateSyncHandler(): failed to write configuration file for systemd mode")
return err
Expand Down
29 changes: 29 additions & 0 deletions pkg/helper/mock/mock_helper.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions pkg/host/internal/kernel/kernel.go
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,34 @@ func (k *kernel) InstallRDMA(packageManager string) error {
return nil
}

func (k *kernel) GetRDMASubsystem() (string, error) {
log.Log.Info("GetRDMASubsystem(): retrieving RDMA subsystem mode")
chrootDefinition := utils.GetChrootExtension()

stdout, stderr, err := k.utilsHelper.RunCommand("/bin/sh", "-c", fmt.Sprintf("%s /usr/bin/rdma system show", chrootDefinition))
if err != nil && len(stderr) != 0 {
log.Log.Error(err, "GetRDMASubsystem(): failed to get RDMA subsystem mode", "stdout", stdout, "stderr", stderr)
return "", err
}

// Example of an =output: netns shared copy-on-fork on
subsystem := strings.Fields(stdout)[1]
return subsystem, nil
}

func (k *kernel) SetRDMASubsystem(mode string) error {
log.Log.Info("SetRDMASubsystem(): Updating RDMA subsystem mode")
chrootDefinition := utils.GetChrootExtension()

stdout, stderr, err := k.utilsHelper.RunCommand("/bin/sh", "-c", fmt.Sprintf("%s /usr/bin/rdma system set net %s", chrootDefinition, mode))
if err != nil && len(stderr) != 0 {
log.Log.Error(err, "SetRDMASubsystem(): failed to update RDMA subsystem mode", "stdout", stdout, "stderr", stderr)
return err
}

return nil
}

func (k *kernel) TriggerUdevEvent() error {
log.Log.Info("TriggerUdevEvent(): installing RDMA")

Expand Down
29 changes: 29 additions & 0 deletions pkg/host/mock/mock_host.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/host/types/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ type KernelInterface interface {
EnableRDMA(conditionFilePath, serviceName, packageManager string) (bool, error)
// InstallRDMA install RDMA packages on the system
InstallRDMA(packageManager string) error
// GetRDMASubsystem returns RDMA subsystem mode
GetRDMASubsystem() (string, error)
// SetRDMASubsystem changes RDMA subsystem mode
SetRDMASubsystem(mode string) error
// EnableRDMAOnRHELMachine enable RDMA on a RHEL base system
EnableRDMAOnRHELMachine() (bool, error)
// GetOSPrettyName returns OS name
Expand Down
Loading

0 comments on commit 3d19033

Please sign in to comment.