Skip to content

Commit

Permalink
Merge branch 'master' into feat/release-ci-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dejanzele authored Jun 28, 2023
2 parents 9a31a86 + 6cc7e56 commit c70d4da
Show file tree
Hide file tree
Showing 78 changed files with 3,213 additions and 2,213 deletions.
2 changes: 1 addition & 1 deletion client/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ license = { text = "Apache Software License" }
authors = [{ name = "G-Research Open Source Software", email = "armada@armadaproject.io" }]

[project.optional-dependencies]
format = ["black==23.3.0", "flake8==6.0.0", "pylint==2.17.3"]
format = ["black==23.3.0", "flake8==6.0.0", "pylint==2.17.4"]
docs = ["sphinx", "sphinx-jekyll-builder", "sphinx-toolbox==3.2.0b1"]
test = ["pytest==7.3.1", "coverage>=6.5.0", "pytest-asyncio==0.21.0"]

Expand Down
10 changes: 1 addition & 9 deletions cmd/armada-load-tester/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,7 @@ var rootCmd = &cobra.Command{
Command line utility to submit many jobs to armada
Persistent config can be saved in a config file so it doesn't have to be specified every command.
Example structure:
armadaUrl: localhost:50051
basicAuth:
username: user1
password: password123
The location of this file can be passed in using --config argument or picked from $HOME/.armadactl.yaml.
`,
The location of this file can be passed in using --config argument or picked from $HOME/.armadactl.yaml.`,
}

// Execute adds all child commands to the root command and sets flags appropriately.
Expand Down
7 changes: 0 additions & 7 deletions cmd/testsuite/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@ func RootCmd() *cobra.Command {
Long: `testsuite is a suite of automated tests for Armada deployments.
Persistent config can be saved in a config file so it doesn't have to be specified every command.
Example structure:
armadaUrl: localhost:50051
basicAuth:
username: user1
password: password123
The location of this file can be passed in using the --config argument.
If not provided, $HOME/.armadactl.yaml is used.`,
}
Expand Down
21 changes: 11 additions & 10 deletions config/armada/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ eventsApiRedis:
poolSize: 1000
scheduling:
enableAssertions: true
fairnessModel: "AssetFairness"
dominantResourceFairnessResourcesToConsider:
- "cpu"
- "memory"
- "nvidia.com/gpu"
resourceScarcity:
cpu: 1.0
preemption:
nodeEvictionProbability: 1.0
nodeOversubscriptionEvictionProbability: 1.0
protectedFractionOfFairShare: 1.0
setNodeIdSelector: true
nodeIdLabel: kubernetes.io/hostname
setNodeName: false
Expand All @@ -42,8 +50,8 @@ scheduling:
priority: 1000
preemptible: false
maximumResourceFractionPerQueue:
memory: 0.99
cpu: 0.99
memory: 1.0
cpu: 1.0
armada-preemptible:
priority: 1000
preemptible: true
Expand All @@ -53,7 +61,7 @@ scheduling:
maxExtraNodesToConsider: 1
maximumResourceFractionToSchedule:
memory: 1.0
cpu: 1.0
cpu: 1.0
maxJobSchedulingContextsPerExecutor: 10000
lease:
expireAfter: 15m
Expand All @@ -68,11 +76,6 @@ scheduling:
value: "true"
effect: "NoSchedule"
defaultJobTolerationsByPriorityClass:
"":
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
value: "true"
effect: "NoSchedule"
armada-default:
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
Expand All @@ -84,8 +87,6 @@ scheduling:
value: "true"
effect: "NoSchedule"
maxRetries: 5
resourceScarcity:
cpu: 1.0
maxPodSpecSizeBytes: 65535
minJobResources:
memory: 1Mi
Expand Down
3 changes: 3 additions & 0 deletions config/executor/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ kubernetes:
fatalPodSubmissionErrors:
- "admission webhook"
- "namespaces \".*\" not found"
stateChecks:
deadlineForSubmittedPodConsideredMissing: 15m
deadlineForActivePodConsideredMissing: 5m
pendingPodChecks:
deadlineForUpdates: 10m
deadlineForNodeAssignment: 5m
Expand Down
24 changes: 13 additions & 11 deletions config/scheduler/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ grpc:
scheduling:
executorTimeout: 10m
enableAssertions: true
fairnessModel: "AssetFairness"
dominantResourceFairnessResourcesToConsider:
- "cpu"
- "memory"
- "nvidia.com/gpu"
resourceScarcity:
cpu: 1.0
preemption:
alwaysAttemptScheduling: false
enabled: true
Expand All @@ -60,8 +67,8 @@ scheduling:
priority: 1000
preemptible: false
maximumResourceFractionPerQueue:
memory: 0.99
cpu: 0.99
memory: 1.0
cpu: 1.0
armada-preemptible:
priority: 1000
preemptible: true
Expand All @@ -85,11 +92,6 @@ scheduling:
value: "true"
effect: "NoSchedule"
defaultJobTolerationsByPriorityClass:
"":
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
value: "true"
effect: "NoSchedule"
armada-default:
- key: "armadaproject.io/pc-armada-default"
operator: "Equal"
Expand All @@ -101,11 +103,11 @@ scheduling:
value: "true"
effect: "NoSchedule"
maxRetries: 5
resourceScarcity:
cpu: 1.0
indexedResources:
- cpu
- memory
- name: "cpu"
resolution: "100m"
- name: "memory"
resolution: "1Mi"
gangIdAnnotation: armadaproject.io/gangId
gangCardinalityAnnotation: armadaproject.io/gangCardinality

31 changes: 25 additions & 6 deletions internal/armada/configuration/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,11 @@ type SchedulingConfig struct {
DefaultJobTolerationsByResourceRequest map[string][]v1.Toleration
// Maximum number of times a job is retried before considered failed.
MaxRetries uint
// Weights used when computing fair share.
// Controls how fairness is calculated. Can be either AssetFairness or DominantResourceFairness.
FairnessModel FairnessModel
// List of resource names, e.g., []string{"cpu", "memory"}, to consider when computing DominantResourceFairness.
DominantResourceFairnessResourcesToConsider []string
// Weights used to compute fair share when using AssetFairness.
// Overrides dynamic scarcity calculation if provided.
// Applies to both the new and old scheduler.
ResourceScarcity map[string]float64
Expand Down Expand Up @@ -187,6 +191,20 @@ type SchedulingConfig struct {
AlwaysAttemptScheduling bool
}

// FairnessModel controls how fairness is computed.
// More specifically, each queue has a cost associated with it and the next job to schedule
// is taken from the queue with smallest cost. FairnessModel determines how that cost is computed.
type FairnessModel string

const (
// AssetFairness sets the cost associated with a queue to a linear combination of its total allocation.
// E.g., w_CPU * "CPU allocation" + w_memory * "memory allocation".
AssetFairness FairnessModel = "AssetFairness"
// DominantResourceFairness set the cost associated with a queue to
// max("CPU allocation" / "CPU capacity", "memory allocation" / "mamory capacity", ...).
DominantResourceFairness FairnessModel = "DominantResourceFairness"
)

type IndexedResource struct {
// Resource name. E.g., "cpu", "memory", or "nvidia.com/gpu".
Name string
Expand All @@ -209,6 +227,8 @@ type PreemptionConfig struct {
// the probability of evicting jobs on oversubscribed nodes, i.e.,
// nodes on which the total resource requests are greater than the available resources.
NodeOversubscriptionEvictionProbability float64
// Only queues allocated more than this fraction of their fair share are considered for preemption.
ProtectedFractionOfFairShare float64
// If true, the Armada scheduler will add to scheduled pods a node selector
// NodeIdLabel: <value of label on node selected by scheduler>.
// If true, NodeIdLabel must be non-empty.
Expand All @@ -233,13 +253,12 @@ type PriorityClass struct {
Priority int32
// If true, Armada may preempt jobs of this class to improve fairness.
Preemptible bool
// Limits resources assigned to jobs of priority equal to or lower than that of this priority class.
// Limits resources assigned to jobs of this priority class.
// Specifically, jobs of this priority class are only scheduled if doing so does not exceed this limit.
//
// For example, if priority is 10 and MaximumResourceFractionPerQueue is map[string]float64{"cpu": 0.3},
// jobs of this priority class are not scheduled if doing so would cause the total resources assigned
// to jobs of priority 10 or lower from the same queue to exceed 30% of the total.
MaximumResourceFractionPerQueue map[string]float64
// Per-pool override of MaximumResourceFractionPerQueue.
// If missing for a particular pool, MaximumResourceFractionPerQueue is used instead for that pool.
MaximumResourceFractionPerQueueByPool map[string]map[string]float64
}

func (p PreemptionConfig) PriorityByPriorityClassName() map[string]int32 {
Expand Down
Loading

0 comments on commit c70d4da

Please sign in to comment.