Skip to content

Commit

Permalink
Drain time metric (#1)
Browse files Browse the repository at this point in the history
* add drain metric

* Register metric

---------

Co-authored-by: Garvin <garvinpang@protonmail.com>
  • Loading branch information
garvinp-stripe and GnatorX authored Nov 17, 2023
1 parent e4c0184 commit fa74dc9
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 43 deletions.
50 changes: 7 additions & 43 deletions pkg/controllers/metrics/node/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,12 @@ import (
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/apis/v1beta1"
"github.com/aws/karpenter-core/pkg/controllers/state"
"github.com/aws/karpenter-core/pkg/metrics"
"github.com/aws/karpenter-core/pkg/operator/controller"
"github.com/aws/karpenter-core/pkg/utils/resources"
)

const (
resourceType = "resource_type"
nodeName = "node_name"
nodeProvisioner = "provisioner"
nodePhase = "phase"
)

var (
allocatableGaugeVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand Down Expand Up @@ -98,16 +89,17 @@ var (
},
nodeLabelNames(),
)
wellKnownLabels = getWellKnownLabels()

wellKnownLabels = GetWellKnownLabels()
)

func nodeLabelNames() []string {
return append(
sets.New(lo.Values(wellKnownLabels)...).UnsortedList(),
resourceType,
nodeName,
nodeProvisioner,
nodePhase,
ResourceType,
metrics.NodeName,
metrics.ProvisionerLabel,
NodePhase,
)
}

Expand Down Expand Up @@ -165,37 +157,9 @@ func buildMetrics(n *state.StateNode) (res []*metrics.StoreMetric) {
res = append(res, &metrics.StoreMetric{
GaugeVec: gaugeVec,
Value: lo.Ternary(resourceName == v1.ResourceCPU, float64(quantity.MilliValue())/float64(1000), float64(quantity.Value())),
Labels: getNodeLabels(n.Node, strings.ReplaceAll(strings.ToLower(string(resourceName)), "-", "_")),
Labels: GetNodeLabels(n.Node, strings.ReplaceAll(strings.ToLower(string(resourceName)), "-", "_")),
})
}
}
return res
}

func getNodeLabels(node *v1.Node, resourceTypeName string) prometheus.Labels {
metricLabels := prometheus.Labels{}
metricLabels[resourceType] = resourceTypeName
metricLabels[nodeName] = node.Name
metricLabels[nodeProvisioner] = node.Labels[v1alpha5.ProvisionerNameLabelKey]
metricLabels[nodePhase] = string(node.Status.Phase)

// Populate well known labels
for wellKnownLabel, label := range wellKnownLabels {
metricLabels[label] = node.Labels[wellKnownLabel]
}
return metricLabels
}

func getWellKnownLabels() map[string]string {
labels := make(map[string]string)
// TODO @joinnis: Remove v1alpha5 well-known labels in favor of only v1beta1 well-known labels after v1alpha5 is dropped
for wellKnownLabel := range v1alpha5.WellKnownLabels.Union(v1beta1.WellKnownLabels) {
if parts := strings.Split(wellKnownLabel, "/"); len(parts) == 2 {
label := parts[1]
// Reformat label names to be consistent with Prometheus naming conventions (snake_case)
label = strings.ReplaceAll(strings.ToLower(label), "-", "_")
labels[wellKnownLabel] = label
}
}
return labels
}
64 changes: 64 additions & 0 deletions pkg/controllers/metrics/node/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package node

import (
"strings"

"github.com/prometheus/client_golang/prometheus"
v1 "k8s.io/api/core/v1"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/apis/v1beta1"
"github.com/aws/karpenter-core/pkg/metrics"
)

const (
ResourceType = "resource_type"
NodePhase = "phase"
)

func GetWellKnownLabels() map[string]string {
labels := make(map[string]string)
// TODO @joinnis: Remove v1alpha5 well-known labels in favor of only v1beta1 well-known labels after v1alpha5 is dropped
for wellKnownLabel := range v1alpha5.WellKnownLabels.Union(v1beta1.WellKnownLabels) {
if parts := strings.Split(wellKnownLabel, "/"); len(parts) == 2 {
label := parts[1]
// Reformat label names to be consistent with Prometheus naming conventions (snake_case)
label = strings.ReplaceAll(strings.ToLower(label), "-", "_")
labels[wellKnownLabel] = label
}
}
return labels
}

func GetNodeLabels(node *v1.Node, resourceTypeName string) prometheus.Labels {
metricLabels := prometheus.Labels{}
if resourceTypeName != "" {
metricLabels[ResourceType] = resourceTypeName
}
metricLabels[metrics.NodeName] = node.Name
metricLabels[metrics.ProvisionerLabel] = node.Labels[v1alpha5.ProvisionerNameLabelKey]
if node.Labels[v1beta1.NodePoolLabelKey] != "" {
metricLabels[metrics.NodePoolLabel] = node.Labels[v1beta1.NodePoolLabelKey]
}
metricLabels[NodePhase] = string(node.Status.Phase)

// Populate well known labels
for wellKnownLabel, label := range wellKnownLabels {
metricLabels[label] = node.Labels[wellKnownLabel]
}
return metricLabels
}
2 changes: 2 additions & 0 deletions pkg/controllers/node/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/apis/v1beta1"
"github.com/aws/karpenter-core/pkg/cloudprovider"
nodemetrics "github.com/aws/karpenter-core/pkg/controllers/metrics/node"
"github.com/aws/karpenter-core/pkg/controllers/node/termination/terminator"
terminatorevents "github.com/aws/karpenter-core/pkg/controllers/node/termination/terminator/events"
"github.com/aws/karpenter-core/pkg/events"
Expand Down Expand Up @@ -94,6 +95,7 @@ func (c *Controller) Finalize(ctx context.Context, node *v1.Node) (reconcile.Res
}
return reconcile.Result{}, fmt.Errorf("getting machine, %w", err)
}
NodeDrainTime.With(nodemetrics.GetNodeLabels(node, "")).Set(time.Since(node.DeletionTimestamp.Time).Seconds())
return reconcile.Result{RequeueAfter: 1 * time.Second}, nil
}
if err := c.cloudProvider.Delete(ctx, nodeclaimutil.NewFromNode(node)); cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil {
Expand Down
23 changes: 23 additions & 0 deletions pkg/controllers/node/termination/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@ package termination

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"k8s.io/apimachinery/pkg/util/sets"
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"

"github.com/aws/karpenter-core/pkg/metrics"

nodemetrics "github.com/aws/karpenter-core/pkg/controllers/metrics/node"
)

var (
Expand All @@ -32,8 +36,27 @@ var (
},
[]string{metrics.ProvisionerLabel, metrics.NodePoolLabel},
)
NodeDrainTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "karpenter",
Subsystem: "nodes",
Name: "drain_time_seconds",
Help: "The time taken to drain a node.",
},
nodeLabelNames(),
)
)

func nodeLabelNames() []string {
return append(
sets.New(lo.Values(nodemetrics.GetWellKnownLabels())...).UnsortedList(),
metrics.NodeName,
metrics.ProvisionerLabel,
nodemetrics.NodePhase,
)
}

func init() {
crmetrics.Registry.MustRegister(TerminationSummary)
crmetrics.Registry.MustRegister(NodeDrainTime)
}
1 change: 1 addition & 0 deletions pkg/metrics/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (

ProvisionerLabel = "provisioner"
NodePoolLabel = "nodepool"
NodeName = "node_name"
ReasonLabel = "reason"
TypeLabel = "type"

Expand Down

0 comments on commit fa74dc9

Please sign in to comment.