Refactor eval step, testing, bug fixes, add config (#8)

New configuration options can be set in the YAML config. Can now configure tolerance as a configuration option. Can now configure cpuInitializationPeriod as a configuration option. Can now configure initialReadinessDelay as a configuration option. Add unit tests covering evaluation refactor.
jthomperoo · Dec 10, 2019 · 50df868 · 50df868
1 parent 520c5b5
commit 50df868
Show file tree

Hide file tree

Showing 21 changed files with 2,256 additions and 248 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Added
+- New configuration options can be set in the YAML config.
+    - Can now configure `tolerance` as a configuration option - works the same as the `--horizontal-pod-autoscaler-tolerance` flag, [see here](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). Default value of 0.1.
+    - Can now configure `cpuInitializationPeriod` as a configuration option - works the same as the `--horizontal-pod-autoscaler-cpu-initialization-period` flag, [see here](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). Time set in minutes, default 5 minutes.
+    - Can now configure `initialReadinessDelay` as a configuration option - works the same as the `--horizontal-pod-autoscaler-initial-readiness-delay` flag, [see here](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). Time set in seconds, default 30 seconds.
+### Fixed
+- Issues with evaluation decision making looking in the wrong specs for target values.
 
 ## [v0.1.0] - 2019-12-08
 ### Added

diff --git a/cmd/horizontal-pod-autoscaler/main.go b/cmd/horizontal-pod-autoscaler/main.go
@@ -32,6 +32,7 @@ import (
 	"io/ioutil"
 	"log"
 	"os"
+	"strconv"
 	"strings"
 	"time"
 
@@ -52,6 +53,12 @@ import (
 	externalclient "k8s.io/metrics/pkg/client/external_metrics"
 )
 
+const (
+	defaultTolerance               = float64(0.1)
+	defaultCPUInitializationPeriod = 5
+	defaultInitialReadinessDelay   = 30
+)
+
 func main() {
 	stdin, err := ioutil.ReadAll(os.Stdin)
 	if err != nil {
@@ -100,6 +107,36 @@ func getMetrics(stdin io.Reader) {
 		os.Exit(1)
 	}
 
+	// Get initial readiness delay, can be set as a configuration variable
+	var initialReadinessDelay int64
+	initialReadinessDelayValue, exists := os.LookupEnv("initialReadinessDelay")
+	if !exists {
+		// use default
+		initialReadinessDelay = defaultInitialReadinessDelay
+	} else {
+		// try to parse provided value
+		initialReadinessDelay, err = strconv.ParseInt(initialReadinessDelayValue, 10, 64)
+		if err != nil {
+			log.Fatalf("Invalid initial readiness delay provided - %e\n", err)
+			os.Exit(1)
+		}
+	}
+
+	// Get CPU initialization period, can be set as a configuration variable
+	var cpuInitializationPeriod int64
+	cpuInitializationPeriodValue, exists := os.LookupEnv("cpuInitializationPeriod")
+	if !exists {
+		// use default
+		cpuInitializationPeriod = defaultCPUInitializationPeriod
+	} else {
+		// try to parse provided value
+		cpuInitializationPeriod, err = strconv.ParseInt(cpuInitializationPeriodValue, 10, 64)
+		if err != nil {
+			log.Fatalf("Invalid CPU initialization period provided - %e\n", err)
+			os.Exit(1)
+		}
+	}
+
 	// Create the in-cluster Kubernetes config
 	clusterConfig, err := rest.InClusterConfig()
 	if err != nil {
@@ -123,7 +160,7 @@ func getMetrics(stdin io.Reader) {
 			customclient.NewAvailableAPIsGetter(clientset.Discovery()),
 		),
 		externalclient.NewForConfigOrDie(clusterConfig),
-	), &podclient.OnDemandPodLister{Clientset: clientset}, 5*time.Minute, 30*time.Second)
+	), &podclient.OnDemandPodLister{Clientset: clientset}, time.Duration(cpuInitializationPeriod)*time.Minute, time.Duration(initialReadinessDelay)*time.Second)
 
 	// Get metrics for deployment
 	metrics, err := gatherer.GetMetrics(&deployment, metricSpecs, deployment.ObjectMeta.Namespace)
@@ -144,7 +181,6 @@ func getMetrics(stdin io.Reader) {
 }
 
 func getEvaluation(stdin io.Reader) {
-
 	var resourceMetrics cpametric.ResourceMetrics
 	err := yaml.NewYAMLOrJSONDecoder(stdin, 10).Decode(&resourceMetrics)
 	if err != nil {
@@ -157,14 +193,29 @@ func getEvaluation(stdin io.Reader) {
 		os.Exit(1)
 	}
 
+	// Get tolerance, can be set as a configuration variable
+	var tolerance float64
+	toleranceValue, exists := os.LookupEnv("tolerance")
+	if !exists {
+		// use default
+		tolerance = defaultTolerance
+	} else {
+		// try to parse provided value
+		tolerance, err = strconv.ParseFloat(toleranceValue, 64)
+		if err != nil {
+			log.Fatalf("Invalid tolerance provided - %e\n", err)
+			os.Exit(1)
+		}
+	}
+
 	var combinedMetrics []*metric.Metric
 	err = yaml.NewYAMLOrJSONDecoder(strings.NewReader(resourceMetrics.Metrics[0].Value), 10).Decode(&combinedMetrics)
 	if err != nil {
 		log.Fatal(err)
 		os.Exit(1)
 	}
 
-	evaluator := evaluate.Evaluator{}
+	evaluator := evaluate.NewEvaluate(tolerance)
 	evaluation, err := evaluator.GetEvaluation(combinedMetrics)
 	if err != nil {
 		log.Fatal(err)

diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,31 @@
+codecov:
+  require_ci_to_pass: yes
+
+coverage:
+  precision: 2
+  round: down
+  range: "70...100"
+
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+    patch:
+      default:
+        target: 95%
+        threshold: 1%
+    changes: no
+
+parsers:
+  gcov:
+    branch_detection:
+      conditional: yes
+      loop: yes
+      method: no
+      macro: no
+
+comment:
+  layout: "reach,diff,flags,tree"
+  behavior: default
+  require_changes: yes
diff --git a/evaluate/calculate/calculate.go b/evaluate/calculate/calculate.go
@@ -0,0 +1,133 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Modifications Copyright 2019 The Custom Pod Autoscaler Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+
+Modified to split up evaluations and metric gathering to work with the
+Custom Pod Autoscaler framework.
+Original source:
+https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/podautoscaler/horizontal.go
+https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/podautoscaler/replica_calculator.go
+*/
+
+package calculate
+
+import (
+	"math"
+
+	"k8s.io/apimachinery/pkg/util/sets"
+	metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
+)
+
+// Calculater is used to calculate replica counts
+type Calculater interface {
+	GetUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, readyPodCount int64) int32
+	GetPlainMetricReplicaCount(metrics metricsclient.PodMetricsInfo,
+		currentReplicas int32,
+		targetUtilization int64,
+		readyPodCount int64,
+		missingPods,
+		ignoredPods sets.String) int32
+}
+
+// ReplicaCalculate uses a tolerance provided to calculate replica counts for scaling up/down/remaining the same
+type ReplicaCalculate struct {
+	Tolerance float64
+}
+
+// GetUsageRatioReplicaCount calculates the replica count based on the number of replicas, number of ready pods and the
+// usage ratio of the metric - providing a different value if beyond the tolerance
+func (r *ReplicaCalculate) GetUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, readyPodCount int64) int32 {
+	var replicaCount int32
+	if currentReplicas != 0 {
+		if math.Abs(1.0-usageRatio) <= r.Tolerance {
+			// return the current replicas if the change would be too small
+			return currentReplicas
+		}
+		replicaCount = int32(math.Ceil(usageRatio * float64(readyPodCount)))
+	} else {
+		// Scale to zero or n pods depending on usageRatio
+		replicaCount = int32(math.Ceil(usageRatio))
+	}
+
+	return replicaCount
+}
+
+// GetPlainMetricReplicaCount calculates the replica count based on the metrics of each pod and a target utilization, providing
+// a different replica count if the calculated usage ratio is beyond the tolerance
+func (r *ReplicaCalculate) GetPlainMetricReplicaCount(metrics metricsclient.PodMetricsInfo,
+	currentReplicas int32,
+	targetUtilization int64,
+	readyPodCount int64,
+	missingPods,
+	ignoredPods sets.String) int32 {
+
+	usageRatio, _ := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)
+
+	// usageRatio = SUM(pod metrics) / number of pods / targetUtilization
+	// usageRatio = averageUtilization / targetUtilization
+	// usageRatio ~ 1.0 == no scale
+	// usageRatio > 1.0 == scale up
+	// usageRatio < 1.0 == scale down
+
+	rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0
+
+	if !rebalanceIgnored && len(missingPods) == 0 {
+		if math.Abs(1.0-usageRatio) <= r.Tolerance {
+			// return the current replicas if the change would be too small
+			return currentReplicas
+		}
+
+		// if we don't have any unready or missing pods, we can calculate the new replica count now
+		return int32(math.Ceil(usageRatio * float64(readyPodCount)))
+	}
+
+	if len(missingPods) > 0 {
+		if usageRatio < 1.0 {
+			// on a scale-down, treat missing pods as using 100% of the resource request
+			for podName := range missingPods {
+				metrics[podName] = metricsclient.PodMetric{Value: targetUtilization}
+			}
+		} else {
+			// on a scale-up, treat missing pods as using 0% of the resource request
+			for podName := range missingPods {
+				metrics[podName] = metricsclient.PodMetric{Value: 0}
+			}
+		}
+	}
+
+	if rebalanceIgnored {
+		// on a scale-up, treat unready pods as using 0% of the resource request
+		for podName := range ignoredPods {
+			metrics[podName] = metricsclient.PodMetric{Value: 0}
+		}
+	}
+
+	// re-run the utilization calculation with our new numbers
+	newUsageRatio, _ := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)
+
+	if math.Abs(1.0-newUsageRatio) <= r.Tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
+		// return the current replicas if the change would be too small,
+		// or if the new usage ratio would cause a change in scale direction
+		return currentReplicas
+	}
+
+	// return the result, where the number of replicas considered is
+	// however many replicas factored into our calculation
+	return int32(math.Ceil(newUsageRatio * float64(len(metrics))))
+}