Skip to content

Commit

Permalink
Add default TSCs if not present to ensure even distribution of OSDs
Browse files Browse the repository at this point in the history
When parts of the placement spec, such as tolerations or node affinity,
are defined, the ocs-operator stops applying default placement specs,
including TSCs. The osd distribution may become potentially uneven in
absence of the TSC. Always adding default TSCs ensures consistent and
balanced OSD placement across nodes.

Signed-off-by: Malay Kumar Parida <mparida@redhat.com>
  • Loading branch information
malayparida2000 committed Dec 2, 2024
1 parent d8623c9 commit 739e804
Showing 1 changed file with 40 additions and 34 deletions.
74 changes: 40 additions & 34 deletions controllers/storagecluster/cephcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -783,21 +783,21 @@ func newStorageClassDeviceSets(sc *ocsv1.StorageCluster) []rookCephv1.StorageCla
topologyKey := ds.TopologyKey
topologyKeyValues := []string{}

noPlacement := ds.Placement.NodeAffinity == nil && ds.Placement.PodAffinity == nil && ds.Placement.PodAntiAffinity == nil && ds.Placement.TopologySpreadConstraints == nil
noPreparePlacement := ds.PreparePlacement.NodeAffinity == nil && ds.PreparePlacement.PodAffinity == nil && ds.PreparePlacement.PodAntiAffinity == nil && ds.PreparePlacement.TopologySpreadConstraints == nil
noPlacementTsc := ds.Placement.TopologySpreadConstraints == nil
noPlacement := ds.Placement.NodeAffinity == nil && ds.Placement.PodAffinity == nil && ds.Placement.PodAntiAffinity == nil && noPlacementTsc
noPreparePlacementTsc := ds.PreparePlacement.TopologySpreadConstraints == nil
noPreparePlacement := ds.PreparePlacement.NodeAffinity == nil && ds.PreparePlacement.PodAffinity == nil && ds.PreparePlacement.PodAntiAffinity == nil && noPreparePlacementTsc

if noPlacement {
if topologyKey == "" {
topologyKey = getFailureDomain(sc)
}
if topologyKey == "" {
topologyKey = getFailureDomain(sc)
}

if topologyKey == "host" {
portable = false
}
if topologyKey == "host" {
portable = false
}

if topologyMap != nil {
topologyKey, topologyKeyValues = topologyMap.GetKeyValues(topologyKey)
}
if topologyMap != nil {
topologyKey, topologyKeyValues = topologyMap.GetKeyValues(topologyKey)
}

count, replica := countAndReplicaOf(&ds)
Expand All @@ -812,28 +812,7 @@ func newStorageClassDeviceSets(sc *ocsv1.StorageCluster) []rookCephv1.StorageCla
if noPreparePlacement {
in := getPlacement(sc, "osd-prepare")
(&in).DeepCopyInto(&preparePlacement)
}

if len(topologyKeyValues) >= getMinDeviceSetReplica(sc) {
// Hard constraints are set in OSD placement for portable volumes with rack failure domain
// domain as there is no node affinity in PVs. This restricts the movement of OSDs
// between failure domain.
if portable && !strings.Contains(topologyKey, "zone") {
addStrictFailureDomainTSC(&placement, topologyKey)
}
// If topologyKey is not host, append additional topology spread constraint to the
// default preparePlacement. This serves even distribution at the host level
// within a failure domain (zone/rack).
if noPreparePlacement {
if topologyKey != corev1.LabelHostname {
addStrictFailureDomainTSC(&preparePlacement, topologyKey)
} else {
preparePlacement.TopologySpreadConstraints[0].TopologyKey = topologyKey
}
}
}

if !noPreparePlacement {
} else {
preparePlacement = ds.PreparePlacement
}
} else if !noPlacement && noPreparePlacement {
Expand All @@ -844,6 +823,33 @@ func newStorageClassDeviceSets(sc *ocsv1.StorageCluster) []rookCephv1.StorageCla
placement = ds.Placement
}

// Add default TSCs if not set to ensure even distribution of OSDs across nodes
if !noPlacement && noPlacementTsc {
placement.TopologySpreadConstraints = append(placement.TopologySpreadConstraints, defaults.DaemonPlacements["osd"].TopologySpreadConstraints...)
}
if !noPreparePlacement && noPreparePlacementTsc {
preparePlacement.TopologySpreadConstraints = append(preparePlacement.TopologySpreadConstraints, defaults.DaemonPlacements["osd-prepare"].TopologySpreadConstraints...)
}

if len(topologyKeyValues) >= getMinDeviceSetReplica(sc) {
// Hard constraints are set in OSD placement for portable volumes with rack failure domain
// domain as there is no node affinity in PVs. This restricts the movement of OSDs
// between failure domain.
if noPlacementTsc && portable && !strings.Contains(topologyKey, "zone") {
addStrictFailureDomainTSC(&placement, topologyKey)
}
// If topologyKey is not host, append additional topology spread constraint to the
// default preparePlacement. This serves even distribution at the host level
// within a failure domain (zone/rack).
if noPreparePlacementTsc {
if topologyKey != corev1.LabelHostname {
addStrictFailureDomainTSC(&preparePlacement, topologyKey)
} else {
preparePlacement.TopologySpreadConstraints[0].TopologyKey = topologyKey
}
}
}

// Annotation crushDeviceClass ensures osd with different CRUSH device class than the one detected by Ceph
crushDeviceClass := ds.DeviceType
if ds.DeviceClass != "" {
Expand Down

0 comments on commit 739e804

Please sign in to comment.