Skip to content
This repository has been archived by the owner on Dec 18, 2022. It is now read-only.

Commit

Permalink
Refactor eval step, testing, bug fixes, add config (#8)
Browse files Browse the repository at this point in the history
New configuration options can be set in the YAML config.
Can now configure tolerance as a configuration option.
Can now configure cpuInitializationPeriod as a configuration option.
Can now configure initialReadinessDelay as a configuration option.
Add unit tests covering evaluation refactor.
  • Loading branch information
jthomperoo authored Dec 10, 2019
1 parent 520c5b5 commit 50df868
Show file tree
Hide file tree
Showing 21 changed files with 2,256 additions and 248 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Added
- New configuration options can be set in the YAML config.
- Can now configure `tolerance` as a configuration option - works the same as the `--horizontal-pod-autoscaler-tolerance` flag, [see here](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). Default value of 0.1.
- Can now configure `cpuInitializationPeriod` as a configuration option - works the same as the `--horizontal-pod-autoscaler-cpu-initialization-period` flag, [see here](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). Time set in minutes, default 5 minutes.
- Can now configure `initialReadinessDelay` as a configuration option - works the same as the `--horizontal-pod-autoscaler-initial-readiness-delay` flag, [see here](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). Time set in seconds, default 30 seconds.
### Fixed
- Issues with evaluation decision making looking in the wrong specs for target values.

## [v0.1.0] - 2019-12-08
### Added
Expand Down
57 changes: 54 additions & 3 deletions cmd/horizontal-pod-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"io/ioutil"
"log"
"os"
"strconv"
"strings"
"time"

Expand All @@ -52,6 +53,12 @@ import (
externalclient "k8s.io/metrics/pkg/client/external_metrics"
)

const (
defaultTolerance = float64(0.1)
defaultCPUInitializationPeriod = 5
defaultInitialReadinessDelay = 30
)

func main() {
stdin, err := ioutil.ReadAll(os.Stdin)
if err != nil {
Expand Down Expand Up @@ -100,6 +107,36 @@ func getMetrics(stdin io.Reader) {
os.Exit(1)
}

// Get initial readiness delay, can be set as a configuration variable
var initialReadinessDelay int64
initialReadinessDelayValue, exists := os.LookupEnv("initialReadinessDelay")
if !exists {
// use default
initialReadinessDelay = defaultInitialReadinessDelay
} else {
// try to parse provided value
initialReadinessDelay, err = strconv.ParseInt(initialReadinessDelayValue, 10, 64)
if err != nil {
log.Fatalf("Invalid initial readiness delay provided - %e\n", err)
os.Exit(1)
}
}

// Get CPU initialization period, can be set as a configuration variable
var cpuInitializationPeriod int64
cpuInitializationPeriodValue, exists := os.LookupEnv("cpuInitializationPeriod")
if !exists {
// use default
cpuInitializationPeriod = defaultCPUInitializationPeriod
} else {
// try to parse provided value
cpuInitializationPeriod, err = strconv.ParseInt(cpuInitializationPeriodValue, 10, 64)
if err != nil {
log.Fatalf("Invalid CPU initialization period provided - %e\n", err)
os.Exit(1)
}
}

// Create the in-cluster Kubernetes config
clusterConfig, err := rest.InClusterConfig()
if err != nil {
Expand All @@ -123,7 +160,7 @@ func getMetrics(stdin io.Reader) {
customclient.NewAvailableAPIsGetter(clientset.Discovery()),
),
externalclient.NewForConfigOrDie(clusterConfig),
), &podclient.OnDemandPodLister{Clientset: clientset}, 5*time.Minute, 30*time.Second)
), &podclient.OnDemandPodLister{Clientset: clientset}, time.Duration(cpuInitializationPeriod)*time.Minute, time.Duration(initialReadinessDelay)*time.Second)

// Get metrics for deployment
metrics, err := gatherer.GetMetrics(&deployment, metricSpecs, deployment.ObjectMeta.Namespace)
Expand All @@ -144,7 +181,6 @@ func getMetrics(stdin io.Reader) {
}

func getEvaluation(stdin io.Reader) {

var resourceMetrics cpametric.ResourceMetrics
err := yaml.NewYAMLOrJSONDecoder(stdin, 10).Decode(&resourceMetrics)
if err != nil {
Expand All @@ -157,14 +193,29 @@ func getEvaluation(stdin io.Reader) {
os.Exit(1)
}

// Get tolerance, can be set as a configuration variable
var tolerance float64
toleranceValue, exists := os.LookupEnv("tolerance")
if !exists {
// use default
tolerance = defaultTolerance
} else {
// try to parse provided value
tolerance, err = strconv.ParseFloat(toleranceValue, 64)
if err != nil {
log.Fatalf("Invalid tolerance provided - %e\n", err)
os.Exit(1)
}
}

var combinedMetrics []*metric.Metric
err = yaml.NewYAMLOrJSONDecoder(strings.NewReader(resourceMetrics.Metrics[0].Value), 10).Decode(&combinedMetrics)
if err != nil {
log.Fatal(err)
os.Exit(1)
}

evaluator := evaluate.Evaluator{}
evaluator := evaluate.NewEvaluate(tolerance)
evaluation, err := evaluator.GetEvaluation(combinedMetrics)
if err != nil {
log.Fatal(err)
Expand Down
31 changes: 31 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
codecov:
require_ci_to_pass: yes

coverage:
precision: 2
round: down
range: "70...100"

status:
project:
default:
target: auto
threshold: 1%
patch:
default:
target: 95%
threshold: 1%
changes: no

parsers:
gcov:
branch_detection:
conditional: yes
loop: yes
method: no
macro: no

comment:
layout: "reach,diff,flags,tree"
behavior: default
require_changes: yes
133 changes: 133 additions & 0 deletions evaluate/calculate/calculate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Modifications Copyright 2019 The Custom Pod Autoscaler Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Modified to split up evaluations and metric gathering to work with the
Custom Pod Autoscaler framework.
Original source:
https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/podautoscaler/horizontal.go
https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/podautoscaler/replica_calculator.go
*/

package calculate

import (
"math"

"k8s.io/apimachinery/pkg/util/sets"
metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
)

// Calculater is used to calculate replica counts
type Calculater interface {
GetUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, readyPodCount int64) int32
GetPlainMetricReplicaCount(metrics metricsclient.PodMetricsInfo,
currentReplicas int32,
targetUtilization int64,
readyPodCount int64,
missingPods,
ignoredPods sets.String) int32
}

// ReplicaCalculate uses a tolerance provided to calculate replica counts for scaling up/down/remaining the same
type ReplicaCalculate struct {
Tolerance float64
}

// GetUsageRatioReplicaCount calculates the replica count based on the number of replicas, number of ready pods and the
// usage ratio of the metric - providing a different value if beyond the tolerance
func (r *ReplicaCalculate) GetUsageRatioReplicaCount(currentReplicas int32, usageRatio float64, readyPodCount int64) int32 {
var replicaCount int32
if currentReplicas != 0 {
if math.Abs(1.0-usageRatio) <= r.Tolerance {
// return the current replicas if the change would be too small
return currentReplicas
}
replicaCount = int32(math.Ceil(usageRatio * float64(readyPodCount)))
} else {
// Scale to zero or n pods depending on usageRatio
replicaCount = int32(math.Ceil(usageRatio))
}

return replicaCount
}

// GetPlainMetricReplicaCount calculates the replica count based on the metrics of each pod and a target utilization, providing
// a different replica count if the calculated usage ratio is beyond the tolerance
func (r *ReplicaCalculate) GetPlainMetricReplicaCount(metrics metricsclient.PodMetricsInfo,
currentReplicas int32,
targetUtilization int64,
readyPodCount int64,
missingPods,
ignoredPods sets.String) int32 {

usageRatio, _ := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)

// usageRatio = SUM(pod metrics) / number of pods / targetUtilization
// usageRatio = averageUtilization / targetUtilization
// usageRatio ~ 1.0 == no scale
// usageRatio > 1.0 == scale up
// usageRatio < 1.0 == scale down

rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0

if !rebalanceIgnored && len(missingPods) == 0 {
if math.Abs(1.0-usageRatio) <= r.Tolerance {
// return the current replicas if the change would be too small
return currentReplicas
}

// if we don't have any unready or missing pods, we can calculate the new replica count now
return int32(math.Ceil(usageRatio * float64(readyPodCount)))
}

if len(missingPods) > 0 {
if usageRatio < 1.0 {
// on a scale-down, treat missing pods as using 100% of the resource request
for podName := range missingPods {
metrics[podName] = metricsclient.PodMetric{Value: targetUtilization}
}
} else {
// on a scale-up, treat missing pods as using 0% of the resource request
for podName := range missingPods {
metrics[podName] = metricsclient.PodMetric{Value: 0}
}
}
}

if rebalanceIgnored {
// on a scale-up, treat unready pods as using 0% of the resource request
for podName := range ignoredPods {
metrics[podName] = metricsclient.PodMetric{Value: 0}
}
}

// re-run the utilization calculation with our new numbers
newUsageRatio, _ := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)

if math.Abs(1.0-newUsageRatio) <= r.Tolerance || (usageRatio < 1.0 && newUsageRatio > 1.0) || (usageRatio > 1.0 && newUsageRatio < 1.0) {
// return the current replicas if the change would be too small,
// or if the new usage ratio would cause a change in scale direction
return currentReplicas
}

// return the result, where the number of replicas considered is
// however many replicas factored into our calculation
return int32(math.Ceil(newUsageRatio * float64(len(metrics))))
}
Loading

0 comments on commit 50df868

Please sign in to comment.