-
Notifications
You must be signed in to change notification settings - Fork 134
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add dominant resource fairness #2614
Merged
Merged
Changes from 7 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
506fd09
Move fair share comp. into context
severinson 276f462
Avoid evicting queues below their fair share
severinson dc3e9fb
Add tests
severinson 1c75141
Comment
severinson 3d1cd09
Comment
severinson 5e6c713
Initial commit
severinson f0422d3
Cleanup
severinson 02d0aa6
Add missing config
severinson 3541093
Comments
severinson 0b80222
Merge branch 'master' of https://github.com/armadaproject/armada into…
severinson 41c5f93
Cleanup
severinson 4254943
Consider queues with queued/running jobs for fairness
severinson File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ import ( | |
"github.com/pkg/errors" | ||
"golang.org/x/exp/maps" | ||
"golang.org/x/exp/slices" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
|
||
"github.com/armadaproject/armada/internal/armada/configuration" | ||
"github.com/armadaproject/armada/internal/common/armadaerrors" | ||
|
@@ -34,6 +35,11 @@ type SchedulingContext struct { | |
PriorityClasses map[string]configuration.PriorityClass | ||
// Default priority class. | ||
DefaultPriorityClass string | ||
// Determines how fairness is computed. | ||
FairnessType configuration.FairnessType | ||
// Used to convert one resource into another when computing fair share. | ||
// Only applies to DominantResourceFairness. | ||
FairnessResourceMappingBySourceResource map[string]configuration.ResourceMapping | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't seem to be used yet. Also, it's not clear to me why resource mapping would only apply to |
||
// Weights used when computing total resource usage. | ||
ResourceScarcity map[string]float64 | ||
// Per-queue scheduling contexts. | ||
|
@@ -77,6 +83,7 @@ func NewSchedulingContext( | |
Pool: pool, | ||
PriorityClasses: priorityClasses, | ||
DefaultPriorityClass: defaultPriorityClass, | ||
FairnessType: configuration.AssertFairness, | ||
zuqq marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ResourceScarcity: resourceScarcity, | ||
QueueSchedulingContexts: make(map[string]*QueueSchedulingContext), | ||
TotalResources: totalResources.DeepCopy(), | ||
|
@@ -106,7 +113,7 @@ func (sctx *SchedulingContext) ClearUnfeasibleSchedulingKeys() { | |
sctx.UnfeasibleSchedulingKeys = make(map[schedulerobjects.SchedulingKey]*JobSchedulingContext) | ||
} | ||
|
||
func (sctx *SchedulingContext) AddQueueSchedulingContext(queue string, priorityFactor float64, initialAllocatedByPriorityClass schedulerobjects.QuantityByTAndResourceType[string]) error { | ||
func (sctx *SchedulingContext) AddQueueSchedulingContext(queue string, weight float64, initialAllocatedByPriorityClass schedulerobjects.QuantityByTAndResourceType[string]) error { | ||
if _, ok := sctx.QueueSchedulingContexts[queue]; ok { | ||
return errors.WithStack(&armadaerrors.ErrInvalidArgument{ | ||
Name: "queue", | ||
|
@@ -128,7 +135,7 @@ func (sctx *SchedulingContext) AddQueueSchedulingContext(queue string, priorityF | |
Created: time.Now(), | ||
ExecutorId: sctx.ExecutorId, | ||
Queue: queue, | ||
PriorityFactor: priorityFactor, | ||
Weight: weight, | ||
Allocated: allocated, | ||
AllocatedByPriorityClass: initialAllocatedByPriorityClass, | ||
ScheduledResourcesByPriorityClass: make(schedulerobjects.QuantityByTAndResourceType[string]), | ||
|
@@ -145,6 +152,21 @@ func (sctx *SchedulingContext) String() string { | |
return sctx.ReportString(0) | ||
} | ||
|
||
// TotalCostAndWeight returns the sum of the costs and weights across all queues. | ||
// Only queues with non-zero cost contribute towards the total weight. | ||
func (sctx *SchedulingContext) TotalCostAndWeight() (float64, float64) { | ||
var cost float64 | ||
var weight float64 | ||
for _, qctx := range sctx.QueueSchedulingContexts { | ||
queueCost := qctx.TotalCostForQueue() | ||
if queueCost != 0 { | ||
cost += queueCost | ||
weight += qctx.Weight | ||
} | ||
} | ||
return cost, weight | ||
} | ||
|
||
func (sctx *SchedulingContext) ReportString(verbosity int32) string { | ||
var sb strings.Builder | ||
w := tabwriter.NewWriter(&sb, 1, 1, 1, ' ', 0) | ||
|
@@ -313,8 +335,8 @@ type QueueSchedulingContext struct { | |
ExecutorId string | ||
// Queue name. | ||
Queue string | ||
// These factors influence the fraction of resources assigned to each queue. | ||
PriorityFactor float64 | ||
// Determines the fair share of this queue relative to other queues. | ||
Weight float64 | ||
// Total resources assigned to the queue across all clusters by priority class priority. | ||
// Includes jobs scheduled during this invocation of the scheduler. | ||
Allocated schedulerobjects.ResourceList | ||
|
@@ -490,6 +512,44 @@ func (qctx *QueueSchedulingContext) ClearJobSpecs() { | |
} | ||
} | ||
|
||
// TotalCostForQueueWithAllocation returns the cost for which this queue should be penalised when computing fairness, | ||
// if the total allocation of this queue is given by allocated. | ||
zuqq marked this conversation as resolved.
Show resolved
Hide resolved
|
||
func (qctx *QueueSchedulingContext) TotalCostForQueue() float64 { | ||
return qctx.TotalCostForQueueWithAllocation(qctx.Allocated) | ||
} | ||
|
||
// TotalCostForQueueWithAllocation returns the cost for which this queue should be penalised when computing fairness, | ||
// if the total allocation of this queue is given by allocated. | ||
zuqq marked this conversation as resolved.
Show resolved
Hide resolved
|
||
func (qctx *QueueSchedulingContext) TotalCostForQueueWithAllocation(allocated schedulerobjects.ResourceList) float64 { | ||
switch qctx.SchedulingContext.FairnessType { | ||
case configuration.AssertFairness: | ||
zuqq marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return qctx.assetFairnessCostWithAllocation(allocated) | ||
case configuration.DominantResourceFairness: | ||
return qctx.dominantResourceFairnessCostWithAllocation(allocated) | ||
default: | ||
panic(fmt.Sprintf("unknown fairness type: %s", qctx.SchedulingContext.FairnessType)) | ||
} | ||
} | ||
|
||
func (qctx *QueueSchedulingContext) assetFairnessCostWithAllocation(allocated schedulerobjects.ResourceList) float64 { | ||
return float64(allocated.AsWeightedMillis(qctx.SchedulingContext.ResourceScarcity)) / qctx.Weight | ||
} | ||
|
||
func (qctx *QueueSchedulingContext) dominantResourceFairnessCostWithAllocation(allocated schedulerobjects.ResourceList) float64 { | ||
var cost float64 | ||
for t, q := range allocated.Resources { | ||
totalq := qctx.SchedulingContext.TotalResources.Get(t) | ||
if totalq.Cmp(resource.Quantity{}) == 0 { | ||
totalq.SetMilli(1) | ||
} | ||
tcost := float64(q.MilliValue()) / float64(totalq.MilliValue()) | ||
if tcost > cost { | ||
cost = tcost | ||
} | ||
} | ||
return cost / qctx.Weight | ||
} | ||
|
||
type GangSchedulingContext struct { | ||
Created time.Time | ||
Queue string | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This list of possible values is bound to become out of date.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Once DRF is used everywhere, I want to remove asset fairness and leave DRF as the only option.