From 02d0aa6433380a8212d7f845f995ef1c8f9861e0 Mon Sep 17 00:00:00 2001 From: Albin Severinson Date: Wed, 28 Jun 2023 10:47:42 +0100 Subject: [PATCH] Add missing config --- config/armada/config.yaml | 20 ++++++------- config/scheduler/config.yaml | 18 ++++++------ internal/armada/configuration/types.go | 23 +++++---------- internal/armada/server/lease.go | 3 ++ internal/scheduler/context/context.go | 40 +++++++++++++++++--------- internal/scheduler/scheduling_algo.go | 3 ++ 6 files changed, 58 insertions(+), 49 deletions(-) diff --git a/config/armada/config.yaml b/config/armada/config.yaml index 90b8af17421..fbd15ba5476 100644 --- a/config/armada/config.yaml +++ b/config/armada/config.yaml @@ -31,6 +31,13 @@ eventsApiRedis: poolSize: 1000 scheduling: enableAssertions: true + fairnessModel: "AssetFairness" + dominantResourceFairnessResourcesToConsider: + - "cpu" + - "memory" + - "nvidia.com/gpu" + resourceScarcity: + cpu: 1.0 preemption: nodeEvictionProbability: 1.0 nodeOversubscriptionEvictionProbability: 1.0 @@ -43,8 +50,8 @@ scheduling: priority: 1000 preemptible: false maximumResourceFractionPerQueue: - memory: 0.99 - cpu: 0.99 + memory: 1.0 + cpu: 1.0 armada-preemptible: priority: 1000 preemptible: true @@ -54,7 +61,7 @@ scheduling: maxExtraNodesToConsider: 1 maximumResourceFractionToSchedule: memory: 1.0 - cpu: 1.0 + cpu: 1.0 maxJobSchedulingContextsPerExecutor: 10000 lease: expireAfter: 15m @@ -69,11 +76,6 @@ scheduling: value: "true" effect: "NoSchedule" defaultJobTolerationsByPriorityClass: - "": - - key: "armadaproject.io/pc-armada-default" - operator: "Equal" - value: "true" - effect: "NoSchedule" armada-default: - key: "armadaproject.io/pc-armada-default" operator: "Equal" @@ -85,8 +87,6 @@ scheduling: value: "true" effect: "NoSchedule" maxRetries: 5 - resourceScarcity: - cpu: 1.0 maxPodSpecSizeBytes: 65535 minJobResources: memory: 1Mi diff --git a/config/scheduler/config.yaml b/config/scheduler/config.yaml index c05b6e1ebf4..13a095be02f 100644 --- a/config/scheduler/config.yaml +++ b/config/scheduler/config.yaml @@ -49,6 +49,13 @@ grpc: scheduling: executorTimeout: 10m enableAssertions: true + fairnessModel: "AssetFairness" + dominantResourceFairnessResourcesToConsider: + - "cpu" + - "memory" + - "nvidia.com/gpu" + resourceScarcity: + cpu: 1.0 preemption: alwaysAttemptScheduling: false enabled: true @@ -60,8 +67,8 @@ scheduling: priority: 1000 preemptible: false maximumResourceFractionPerQueue: - memory: 0.99 - cpu: 0.99 + memory: 1.0 + cpu: 1.0 armada-preemptible: priority: 1000 preemptible: true @@ -85,11 +92,6 @@ scheduling: value: "true" effect: "NoSchedule" defaultJobTolerationsByPriorityClass: - "": - - key: "armadaproject.io/pc-armada-default" - operator: "Equal" - value: "true" - effect: "NoSchedule" armada-default: - key: "armadaproject.io/pc-armada-default" operator: "Equal" @@ -101,8 +103,6 @@ scheduling: value: "true" effect: "NoSchedule" maxRetries: 5 - resourceScarcity: - cpu: 1.0 indexedResources: - name: "cpu" resolution: "100m" diff --git a/internal/armada/configuration/types.go b/internal/armada/configuration/types.go index a9113a117ee..3c180e499df 100644 --- a/internal/armada/configuration/types.go +++ b/internal/armada/configuration/types.go @@ -114,9 +114,9 @@ type SchedulingConfig struct { // Maximum number of times a job is retried before considered failed. MaxRetries uint // Controls how fairness is calculated. Can be either AssetFairness or DominantResourceFairness. - FairnessType FairnessType - // Used to convert one resource into another when using DominantResourceFairness. - FairnessResourceMapping []ResourceMapping + FairnessModel FairnessModel + // List of resource names, e.g., []string{"cpu", "memory"}, to consider when computing DominantResourceFairness. + DominantResourceFairnessResourcesToConsider []string // Weights used to compute fair share when using AssetFairness. // Overrides dynamic scarcity calculation if provided. // Applies to both the new and old scheduler. @@ -191,7 +191,7 @@ type SchedulingConfig struct { AlwaysAttemptScheduling bool } -// FairnessType controls how fairness is computed. +// FairnessModel controls how fairness is computed. // In particular, each queue has a cost associated with it and the next job to attempt to schedule // is taken from the queue with the smallest cost associated with it. // @@ -202,22 +202,13 @@ type SchedulingConfig struct { // // If DominantResourceFairness, the cost associated with a queue is // max("CPU allocation" / "CPU capacity", "memory allocation" / "mamory capacity", ...). -type FairnessType string +type FairnessModel string const ( - AssertFairness FairnessType = "AssertFairness" - DominantResourceFairness FairnessType = "DominantResourceFairness" + AssetFairness FairnessModel = "AssetFairness" + DominantResourceFairness FairnessModel = "DominantResourceFairness" ) -// ResourceMapping describes a mapping from one resource type to another. Used when computing fairness. -// E.g., ResourceMapping{"nvidia.com/mig-1g.10gb", "nvidia.com/gpu", 1/8} -// indicates 1 unit of "nvidia.com/mig-1g.10gb" should be trated as 1/8 unit of "nvidia.com/gpu". -type ResourceMapping struct { - Source string - Target string - Multiplier float64 -} - type IndexedResource struct { // Resource name. E.g., "cpu", "memory", or "nvidia.com/gpu". Name string diff --git a/internal/armada/server/lease.go b/internal/armada/server/lease.go index ca7bc392709..4cd441a2d25 100644 --- a/internal/armada/server/lease.go +++ b/internal/armada/server/lease.go @@ -468,6 +468,9 @@ func (q *AggregatedQueueServer) getJobs(ctx context.Context, req *api.StreamingL q.schedulingConfig.ResourceScarcity, schedulerobjects.ResourceList{Resources: totalCapacity}, ) + if q.schedulingConfig.FairnessModel == configuration.DominantResourceFairness { + sctx.EnableDominantResourceFairness(q.schedulingConfig.DominantResourceFairnessResourcesToConsider) + } for queue, priorityFactor := range priorityFactorByQueue { weight := 1 / priorityFactor if err := sctx.AddQueueSchedulingContext(queue, weight, allocatedByQueueAndPriorityClassForPool[queue]); err != nil { diff --git a/internal/scheduler/context/context.go b/internal/scheduler/context/context.go index 80f606d5be4..bd8d6e84dee 100644 --- a/internal/scheduler/context/context.go +++ b/internal/scheduler/context/context.go @@ -36,11 +36,10 @@ type SchedulingContext struct { // Default priority class. DefaultPriorityClass string // Determines how fairness is computed. - FairnessType configuration.FairnessType - // Used to convert one resource into another when computing fair share. - // Only applies to DominantResourceFairness. - FairnessResourceMappingBySourceResource map[string]configuration.ResourceMapping - // Weights used when computing total resource usage. + FairnessModel configuration.FairnessModel + // Resources considered when computing DominantResourceFairness. + DominantResourceFairnessResourcesToConsider []string + // Weights used when computing AssetFairness. ResourceScarcity map[string]float64 // Per-queue scheduling contexts. QueueSchedulingContexts map[string]*QueueSchedulingContext @@ -83,7 +82,7 @@ func NewSchedulingContext( Pool: pool, PriorityClasses: priorityClasses, DefaultPriorityClass: defaultPriorityClass, - FairnessType: configuration.AssertFairness, + FairnessModel: configuration.AssetFairness, ResourceScarcity: resourceScarcity, QueueSchedulingContexts: make(map[string]*QueueSchedulingContext), TotalResources: totalResources.DeepCopy(), @@ -95,6 +94,11 @@ func NewSchedulingContext( } } +func (sctx *SchedulingContext) EnableDominantResourceFairness(dominantResourceFairnessResourcesToConsider []string) { + sctx.FairnessModel = configuration.DominantResourceFairness + sctx.DominantResourceFairnessResourcesToConsider = dominantResourceFairnessResourcesToConsider +} + func (sctx *SchedulingContext) SchedulingKeyFromLegacySchedulerJob(job interfaces.LegacySchedulerJob) schedulerobjects.SchedulingKey { var priority int32 if priorityClass, ok := sctx.PriorityClasses[job.GetPriorityClassName()]; ok { @@ -521,28 +525,36 @@ func (qctx *QueueSchedulingContext) TotalCostForQueue() float64 { // TotalCostForQueueWithAllocation returns the cost for which this queue should be penalised when computing fairness, // if the total allocation of this queue is given by allocated. func (qctx *QueueSchedulingContext) TotalCostForQueueWithAllocation(allocated schedulerobjects.ResourceList) float64 { - switch qctx.SchedulingContext.FairnessType { - case configuration.AssertFairness: + switch qctx.SchedulingContext.FairnessModel { + case configuration.AssetFairness: return qctx.assetFairnessCostWithAllocation(allocated) case configuration.DominantResourceFairness: return qctx.dominantResourceFairnessCostWithAllocation(allocated) default: - panic(fmt.Sprintf("unknown fairness type: %s", qctx.SchedulingContext.FairnessType)) + panic(fmt.Sprintf("unknown fairness type: %s", qctx.SchedulingContext.FairnessModel)) } } func (qctx *QueueSchedulingContext) assetFairnessCostWithAllocation(allocated schedulerobjects.ResourceList) float64 { + if len(qctx.SchedulingContext.ResourceScarcity) == 0 { + panic("ResourceScarcity is not set") + } return float64(allocated.AsWeightedMillis(qctx.SchedulingContext.ResourceScarcity)) / qctx.Weight } func (qctx *QueueSchedulingContext) dominantResourceFairnessCostWithAllocation(allocated schedulerobjects.ResourceList) float64 { + if len(qctx.SchedulingContext.DominantResourceFairnessResourcesToConsider) == 0 { + panic("DominantResourceFairnessResourcesToConsider is not set") + } var cost float64 - for t, q := range allocated.Resources { - totalq := qctx.SchedulingContext.TotalResources.Get(t) - if totalq.Cmp(resource.Quantity{}) == 0 { - totalq.SetMilli(1) + for _, t := range qctx.SchedulingContext.DominantResourceFairnessResourcesToConsider { + capacity := qctx.SchedulingContext.TotalResources.Get(t) + if capacity.Equal(resource.Quantity{}) { + // Ignore any resources with zero capacity. + continue } - tcost := float64(q.MilliValue()) / float64(totalq.MilliValue()) + q := allocated.Get(t) + tcost := float64(q.MilliValue()) / float64(capacity.MilliValue()) if tcost > cost { cost = tcost } diff --git a/internal/scheduler/scheduling_algo.go b/internal/scheduler/scheduling_algo.go index 29b98e97d1a..e4cd299b8c0 100644 --- a/internal/scheduler/scheduling_algo.go +++ b/internal/scheduler/scheduling_algo.go @@ -322,6 +322,9 @@ func (l *FairSchedulingAlgo) scheduleOnExecutor( l.config.ResourceScarcity, accounting.totalCapacity, ) + if l.config.FairnessModel == configuration.DominantResourceFairness { + sctx.EnableDominantResourceFairness(l.config.DominantResourceFairnessResourcesToConsider) + } for queue, priorityFactor := range accounting.priorityFactorByQueue { var allocatedByPriorityClass schedulerobjects.QuantityByTAndResourceType[string] if allocatedByQueueAndPriorityClass := accounting.allocationByPoolAndQueueAndPriorityClass[executor.Pool]; allocatedByQueueAndPriorityClass != nil {