Merge branch 'master' into feat/release-ci-fix

armadaproject · Jun 28, 2023 · c70d4da · c70d4da
2 parents 9a31a86 + 6cc7e56
commit c70d4da
Show file tree

Hide file tree

Showing 78 changed files with 3,213 additions and 2,213 deletions.
diff --git a/client/python/pyproject.toml b/client/python/pyproject.toml
@@ -9,7 +9,7 @@ license = { text = "Apache Software License" }
 authors = [{ name = "G-Research Open Source Software", email = "armada@armadaproject.io" }]
 
 [project.optional-dependencies]
-format = ["black==23.3.0", "flake8==6.0.0", "pylint==2.17.3"]
+format = ["black==23.3.0", "flake8==6.0.0", "pylint==2.17.4"]
 docs = ["sphinx", "sphinx-jekyll-builder", "sphinx-toolbox==3.2.0b1"]
 test = ["pytest==7.3.1", "coverage>=6.5.0", "pytest-asyncio==0.21.0"]
 

diff --git a/cmd/armada-load-tester/cmd/root.go b/cmd/armada-load-tester/cmd/root.go
@@ -21,15 +21,7 @@ var rootCmd = &cobra.Command{
 Command line utility to submit many jobs to armada
 
 Persistent config can be saved in a config file so it doesn't have to be specified every command.
-
-Example structure:
-armadaUrl: localhost:50051
-basicAuth:
-  username: user1
-  password: password123
-
-The location of this file can be passed in using --config argument or picked from $HOME/.armadactl.yaml.
-`,
+The location of this file can be passed in using --config argument or picked from $HOME/.armadactl.yaml.`,
 }
 
 // Execute adds all child commands to the root command and sets flags appropriately.

diff --git a/cmd/testsuite/cmd/root.go b/cmd/testsuite/cmd/root.go
@@ -18,13 +18,6 @@ func RootCmd() *cobra.Command {
 		Long: `testsuite is a suite of automated tests for Armada deployments.
 
 Persistent config can be saved in a config file so it doesn't have to be specified every command.
-
-Example structure:
-armadaUrl: localhost:50051
-basicAuth:
-username: user1
-password: password123
-
 The location of this file can be passed in using the --config argument.
 If not provided, $HOME/.armadactl.yaml is used.`,
 	}

diff --git a/config/armada/config.yaml b/config/armada/config.yaml
@@ -31,9 +31,17 @@ eventsApiRedis:
   poolSize: 1000
 scheduling:
   enableAssertions: true
+  fairnessModel: "AssetFairness"
+  dominantResourceFairnessResourcesToConsider:
+    - "cpu"
+    - "memory"
+    - "nvidia.com/gpu"
+  resourceScarcity:
+    cpu: 1.0
   preemption:
     nodeEvictionProbability: 1.0
     nodeOversubscriptionEvictionProbability: 1.0
+    protectedFractionOfFairShare: 1.0
     setNodeIdSelector: true
     nodeIdLabel: kubernetes.io/hostname
     setNodeName: false
@@ -42,8 +50,8 @@ scheduling:
         priority: 1000
         preemptible: false
         maximumResourceFractionPerQueue:
-          memory: 0.99
-          cpu: 0.99
+          memory: 1.0
+          cpu: 1.0
       armada-preemptible:
         priority: 1000
         preemptible: true
@@ -53,7 +61,7 @@ scheduling:
   maxExtraNodesToConsider: 1
   maximumResourceFractionToSchedule:
     memory: 1.0
-    cpu: 1.0    
+    cpu: 1.0
   maxJobSchedulingContextsPerExecutor: 10000
   lease:
     expireAfter: 15m
@@ -68,11 +76,6 @@ scheduling:
       value: "true"
       effect: "NoSchedule"
   defaultJobTolerationsByPriorityClass:
-    "":
-      - key: "armadaproject.io/pc-armada-default"
-        operator: "Equal"
-        value: "true"
-        effect: "NoSchedule"
     armada-default:
       - key: "armadaproject.io/pc-armada-default"
         operator: "Equal"
@@ -84,8 +87,6 @@ scheduling:
         value: "true"
         effect: "NoSchedule"
   maxRetries: 5
-  resourceScarcity:
-    cpu: 1.0
   maxPodSpecSizeBytes: 65535
   minJobResources:
     memory: 1Mi

diff --git a/config/executor/config.yaml b/config/executor/config.yaml
@@ -59,6 +59,9 @@ kubernetes:
   fatalPodSubmissionErrors:
     - "admission webhook"
     - "namespaces \".*\" not found"
+  stateChecks:
+    deadlineForSubmittedPodConsideredMissing: 15m
+    deadlineForActivePodConsideredMissing: 5m
   pendingPodChecks:
     deadlineForUpdates: 10m
     deadlineForNodeAssignment: 5m

diff --git a/config/scheduler/config.yaml b/config/scheduler/config.yaml
@@ -49,6 +49,13 @@ grpc:
 scheduling:
   executorTimeout: 10m
   enableAssertions: true
+  fairnessModel: "AssetFairness"
+  dominantResourceFairnessResourcesToConsider:
+    - "cpu"
+    - "memory"
+    - "nvidia.com/gpu"
+  resourceScarcity:
+    cpu: 1.0
   preemption:
     alwaysAttemptScheduling: false
     enabled: true
@@ -60,8 +67,8 @@ scheduling:
         priority: 1000
         preemptible: false
         maximumResourceFractionPerQueue:
-          memory: 0.99
-          cpu: 0.99
+          memory: 1.0
+          cpu: 1.0
       armada-preemptible:
         priority: 1000
         preemptible: true
@@ -85,11 +92,6 @@ scheduling:
       value: "true"
       effect: "NoSchedule"
   defaultJobTolerationsByPriorityClass:
-    "":
-      - key: "armadaproject.io/pc-armada-default"
-        operator: "Equal"
-        value: "true"
-        effect: "NoSchedule"
     armada-default:
       - key: "armadaproject.io/pc-armada-default"
         operator: "Equal"
@@ -101,11 +103,11 @@ scheduling:
         value: "true"
         effect: "NoSchedule"
   maxRetries: 5
-  resourceScarcity:
-    cpu: 1.0
   indexedResources:
-    - cpu
-    - memory
+    - name: "cpu"
+      resolution: "100m"
+    - name: "memory"
+      resolution: "1Mi"
   gangIdAnnotation: armadaproject.io/gangId
   gangCardinalityAnnotation: armadaproject.io/gangCardinality
 
diff --git a/internal/armada/configuration/types.go b/internal/armada/configuration/types.go
@@ -113,7 +113,11 @@ type SchedulingConfig struct {
 	DefaultJobTolerationsByResourceRequest map[string][]v1.Toleration
 	// Maximum number of times a job is retried before considered failed.
 	MaxRetries uint
-	// Weights used when computing fair share.
+	// Controls how fairness is calculated. Can be either AssetFairness or DominantResourceFairness.
+	FairnessModel FairnessModel
+	// List of resource names, e.g., []string{"cpu", "memory"}, to consider when computing DominantResourceFairness.
+	DominantResourceFairnessResourcesToConsider []string
+	// Weights used to compute fair share when using AssetFairness.
 	// Overrides dynamic scarcity calculation if provided.
 	// Applies to both the new and old scheduler.
 	ResourceScarcity map[string]float64
@@ -187,6 +191,20 @@ type SchedulingConfig struct {
 	AlwaysAttemptScheduling bool
 }
 
+// FairnessModel controls how fairness is computed.
+// More specifically, each queue has a cost associated with it and the next job to schedule
+// is taken from the queue with smallest cost. FairnessModel determines how that cost is computed.
+type FairnessModel string
+
+const (
+	// AssetFairness sets the cost associated with a queue to a linear combination of its total allocation.
+	// E.g., w_CPU * "CPU allocation" + w_memory * "memory allocation".
+	AssetFairness FairnessModel = "AssetFairness"
+	// DominantResourceFairness set the cost associated with a queue to
+	// max("CPU allocation" / "CPU capacity", "memory allocation" / "mamory capacity", ...).
+	DominantResourceFairness FairnessModel = "DominantResourceFairness"
+)
+
 type IndexedResource struct {
 	// Resource name. E.g., "cpu", "memory", or "nvidia.com/gpu".
 	Name string
@@ -209,6 +227,8 @@ type PreemptionConfig struct {
 	// the probability of evicting jobs on oversubscribed nodes, i.e.,
 	// nodes on which the total resource requests are greater than the available resources.
 	NodeOversubscriptionEvictionProbability float64
+	// Only queues allocated more than this fraction of their fair share are considered for preemption.
+	ProtectedFractionOfFairShare float64
 	// If true, the Armada scheduler will add to scheduled pods a node selector
 	// NodeIdLabel: <value of label on node selected by scheduler>.
 	// If true, NodeIdLabel must be non-empty.
@@ -233,13 +253,12 @@ type PriorityClass struct {
 	Priority int32
 	// If true, Armada may preempt jobs of this class to improve fairness.
 	Preemptible bool
-	// Limits resources assigned to jobs of priority equal to or lower than that of this priority class.
+	// Limits resources assigned to jobs of this priority class.
 	// Specifically, jobs of this priority class are only scheduled if doing so does not exceed this limit.
-	//
-	// For example, if priority is 10 and MaximumResourceFractionPerQueue is map[string]float64{"cpu": 0.3},
-	// jobs of this priority class are not scheduled if doing so would cause the total resources assigned
-	// to jobs of priority 10 or lower from the same queue to exceed 30% of the total.
 	MaximumResourceFractionPerQueue map[string]float64
+	// Per-pool override of MaximumResourceFractionPerQueue.
+	// If missing for a particular pool, MaximumResourceFractionPerQueue is used instead for that pool.
+	MaximumResourceFractionPerQueueByPool map[string]map[string]float64
 }
 
 func (p PreemptionConfig) PriorityByPriorityClassName() map[string]int32 {