From aa8cbda942fa974d5e07c5c01042a877c3256376 Mon Sep 17 00:00:00 2001 From: ipsum-0320 Date: Tue, 15 Oct 2024 17:28:53 +0800 Subject: [PATCH] feat: calc group score by weight Signed-off-by: ipsum-0320 chore: update Signed-off-by: ipsum-0320 re-test Signed-off-by: ipsum-0320 feat: update basic score Signed-off-by: ipsum-0320 chore: add comment Signed-off-by: ipsum-0320 feat: calcGroupScoreForDivided Signed-off-by: ipsum-0320 fix: lint error Signed-off-by: ipsum-0320 chore: fix the comment Signed-off-by: ipsum-0320 feat: resolve the comments Signed-off-by: ipsum-0320 fix: lint error Signed-off-by: ipsum-0320 --- .../core/spreadconstraint/group_clusters.go | 162 ++++++++++++++++-- 1 file changed, 150 insertions(+), 12 deletions(-) diff --git a/pkg/scheduler/core/spreadconstraint/group_clusters.go b/pkg/scheduler/core/spreadconstraint/group_clusters.go index 893239416855..9eb273376e40 100644 --- a/pkg/scheduler/core/spreadconstraint/group_clusters.go +++ b/pkg/scheduler/core/spreadconstraint/group_clusters.go @@ -17,6 +17,8 @@ limitations under the License. package spreadconstraint import ( + "math" + "k8s.io/utils/ptr" clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" @@ -40,7 +42,7 @@ type GroupClustersInfo struct { // ProviderInfo indicate the provider information type ProviderInfo struct { Name string - Score int64 // the highest score in all clusters of the provider + Score int64 // the comprehensive score in all clusters of the provider AvailableReplicas int64 // Regions under this provider @@ -54,7 +56,7 @@ type ProviderInfo struct { // RegionInfo indicate the region information type RegionInfo struct { Name string - Score int64 // the highest score in all clusters of the region + Score int64 // the comprehensive score in all clusters of the region AvailableReplicas int64 // Zones under this provider @@ -66,7 +68,7 @@ type RegionInfo struct { // ZoneInfo indicate the zone information type ZoneInfo struct { Name string - Score int64 // the highest score in all clusters of the zone + Score int64 // the comprehensive score in all clusters of the zone AvailableReplicas int64 // Clusters under this zone, sorted by cluster.Score descending. @@ -109,9 +111,9 @@ func groupClustersBasedTopology( } groupClustersInfo.calAvailableReplicasFunc = calAvailableReplicasFunc groupClustersInfo.generateClustersInfo(clustersScore, rbSpec) - groupClustersInfo.generateZoneInfo(spreadConstraints) - groupClustersInfo.generateRegionInfo(spreadConstraints) - groupClustersInfo.generateProviderInfo(spreadConstraints) + groupClustersInfo.generateZoneInfo(spreadConstraints, rbSpec) + groupClustersInfo.generateRegionInfo(spreadConstraints, rbSpec) + groupClustersInfo.generateProviderInfo(spreadConstraints, rbSpec) return groupClustersInfo } @@ -128,6 +130,121 @@ func groupClustersIgnoringTopology( return groupClustersInfo } +// weightUnit is used to minimize the impact of avg(cluster.Score). +// By multiply the weightUnit, the unit of targetReplica will be 1000, and the avg(cluster.Score) will in [0, 100]. +// Thus, when sorting by Group Score, targetReplica will be considered first, and if the Weights are the same, then Score will be considered. +const weightUnit int64 = 1000 + +func (info *GroupClustersInfo) calcGroupScore( + clusters []ClusterDetailInfo, + rbSpec *workv1alpha2.ResourceBindingSpec, + minGroups int) int64 { + // if the replica scheduling type is divided, the score is calculated by followed. + float64MinGroups := float64(minGroups) + targetReplica := int64(math.Ceil(float64(rbSpec.Replicas) / float64MinGroups)) + + // get the minGroups of Cluster. + var clusterMinGroups int + if rbSpec.Placement != nil && rbSpec.Placement.SpreadConstraints != nil { + for _, sc := range rbSpec.Placement.SpreadConstraints { + if sc.SpreadByField == policyv1alpha1.SpreadByFieldCluster { + clusterMinGroups = sc.MinGroups + } + } + } + + // if the minGroups of Cluster is less than the minGroups of Group, set the minGroups of Cluster to the minGroups of Group. + if clusterMinGroups < minGroups { + clusterMinGroups = minGroups + } + int64ClusterMinGroups := int64(clusterMinGroups) + + // clusters have been sorted by cluster.Score descending, + // and if the cluster.Score is the same, the cluster.availableReplica is ascending. + var sumAvailableReplica int64 + var sumScore int64 + var validClusters int64 + for _, cluster := range clusters { + sumAvailableReplica += cluster.AvailableReplicas + sumScore += cluster.Score + validClusters++ + if validClusters >= int64ClusterMinGroups && sumAvailableReplica >= targetReplica { + break + } + } + + // cluster.Score is 0 or 100. To minimize the impact of Score, + // set the atomic value of targetReplica to 1000. This way, + // when sorting by Group Score, targetReplica will be considered first, + // and if the Weights are the same, then Score will be considered. + + // Here is an example, the rbSpec.Replicas == 100 and the Group.minGroups == 2, Cluster.minGroups == 1. + // Thus, the targetReplica is 50, and the int64ClusterMinGroups == 2, because int64ClusterMinGroups == max(Group.minGroups, Cluster.minGroups). + + // There is the Group 1, it has three clusters as follows. + // ---------------------------------------------------------------------- + // | clusterName | member1 | member2 | member3 | member4 | member5 | + // |--------------------------------------------------------------------- + // | score | 100 | 100 | 100 | 100 | 100 | + // |------------------------------------------------|---------|---------| + // |AvailableReplicas | 10 | 10 | 10 | 10 | 5 | + // |------------------------------------------------|---------|---------| + + // There is the Group 2, it has four clusters as follows. + // ------------------------------------------------------------ + // | clusterName | member1 | member2 | member3 | member4 | + // |----------------------------------------------------------- + // | score | 0 | 0 | 0 | 0 | + // |------------------------------------------------|---------| + // |AvailableReplicas | 40 | 30 | 10 | 10 | + // |------------------------------------------------|---------| + + // According to our expectations, Group 2 is a more ideal choice + // than Group 1 because Group 2 has more available replica capacity, + // which meets the needs of replica allocation, even though Group 1 has a higher Cluster balance. + // Based on our algorithm, Group 2’s Score is also higher than that of Group 1. + + // Group1's Score = 45 * 1000 + 100 = 45100 + // Group2's Score = 50 * 1000 + 0 = 50000 + + // There is another example, the targetReplica is 50, and the int64ClusterMinGroups == 2. + // The difference now is the situation of the Groups; both Groups now meet the requirements for available replica capacity. + + // There is the Group 1, it has three clusters as follows. + // ---------------------------------------------------------------------- + // | clusterName | member1 | member2 | member3 | member4 | member5 | + // |--------------------------------------------------------------------- + // | score | 100 | 100 | 100 | 100 | 100 | + // |------------------------------------------------|---------|---------| + // |AvailableReplicas | 40 | 40 | 10 | 10 | 5 | + // |------------------------------------------------|---------|---------| + + // There is the Group 2, it has four clusters as follows. + // ------------------------------------------------------------ + // | clusterName | member1 | member2 | member3 | member4 | + // |----------------------------------------------------------- + // | score | 0 | 0 | 0 | 0 | + // |------------------------------------------------|---------| + // |AvailableReplicas | 100 | 100 | 10 | 10 | + // |------------------------------------------------|---------| + + // According to our expectations, Group 1 is a more ideal choice than Group 2, + // as both Group 2 and Group 1 can now meet the replica allocation requirements. + // However, Group 1 has a higher Cluster balance (even though Group 2 has more available replicas). + // Based on our algorithm, the Score for Group 1 is also higher than that of Group 2. + + // Group1's Score = 50 * 1000 + 100 = 50100 + // Group2's Score = 50 * 1000 + 0 = 50000 + + if sumAvailableReplica < targetReplica { + sumAvailableReplica = sumAvailableReplica * weightUnit + return sumAvailableReplica + sumScore/int64(len(clusters)) + } + + targetReplica = targetReplica * weightUnit + return targetReplica + sumScore/validClusters +} + func (info *GroupClustersInfo) generateClustersInfo(clustersScore framework.ClusterScoreList, rbSpec *workv1alpha2.ResourceBindingSpec) { var clusters []*clusterv1alpha1.Cluster for _, clusterScore := range clustersScore { @@ -153,7 +270,7 @@ func (info *GroupClustersInfo) generateClustersInfo(clustersScore framework.Clus }) } -func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) { +func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) { if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldZone) { return } @@ -178,13 +295,20 @@ func (info *GroupClustersInfo) generateZoneInfo(spreadConstraints []policyv1alph } } + var minGroups int + for _, sc := range spreadConstraints { + if sc.SpreadByField == policyv1alpha1.SpreadByFieldZone { + minGroups = sc.MinGroups + } + } + for zone, zoneInfo := range info.Zones { - zoneInfo.Score = zoneInfo.Clusters[0].Score + zoneInfo.Score = info.calcGroupScore(zoneInfo.Clusters, rbSpec, minGroups) info.Zones[zone] = zoneInfo } } -func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) { +func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) { if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldRegion) { return } @@ -212,13 +336,20 @@ func (info *GroupClustersInfo) generateRegionInfo(spreadConstraints []policyv1al info.Regions[region] = regionInfo } + var minGroups int + for _, sc := range spreadConstraints { + if sc.SpreadByField == policyv1alpha1.SpreadByFieldRegion { + minGroups = sc.MinGroups + } + } + for region, regionInfo := range info.Regions { - regionInfo.Score = regionInfo.Clusters[0].Score + regionInfo.Score = info.calcGroupScore(regionInfo.Clusters, rbSpec, minGroups) info.Regions[region] = regionInfo } } -func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1alpha1.SpreadConstraint) { +func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1alpha1.SpreadConstraint, rbSpec *workv1alpha2.ResourceBindingSpec) { if !IsSpreadConstraintExisted(spreadConstraints, policyv1alpha1.SpreadByFieldProvider) { return } @@ -252,8 +383,15 @@ func (info *GroupClustersInfo) generateProviderInfo(spreadConstraints []policyv1 info.Providers[provider] = providerInfo } + var minGroups int + for _, sc := range spreadConstraints { + if sc.SpreadByField == policyv1alpha1.SpreadByFieldProvider { + minGroups = sc.MinGroups + } + } + for provider, providerInfo := range info.Providers { - providerInfo.Score = providerInfo.Clusters[0].Score + providerInfo.Score = info.calcGroupScore(providerInfo.Clusters, rbSpec, minGroups) info.Providers[provider] = providerInfo } }