diff --git a/pkg/controllers/metrics/node/controller.go b/pkg/controllers/metrics/node/controller.go index 7161c4115f..bc36c561da 100644 --- a/pkg/controllers/metrics/node/controller.go +++ b/pkg/controllers/metrics/node/controller.go @@ -28,21 +28,12 @@ import ( crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/aws/karpenter-core/pkg/apis/v1alpha5" - "github.com/aws/karpenter-core/pkg/apis/v1beta1" "github.com/aws/karpenter-core/pkg/controllers/state" "github.com/aws/karpenter-core/pkg/metrics" "github.com/aws/karpenter-core/pkg/operator/controller" "github.com/aws/karpenter-core/pkg/utils/resources" ) -const ( - resourceType = "resource_type" - nodeName = "node_name" - nodeProvisioner = "provisioner" - nodePhase = "phase" -) - var ( allocatableGaugeVec = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -98,16 +89,17 @@ var ( }, nodeLabelNames(), ) - wellKnownLabels = getWellKnownLabels() + + wellKnownLabels = GetWellKnownLabels() ) func nodeLabelNames() []string { return append( sets.New(lo.Values(wellKnownLabels)...).UnsortedList(), - resourceType, - nodeName, - nodeProvisioner, - nodePhase, + ResourceType, + metrics.NodeName, + metrics.ProvisionerLabel, + NodePhase, ) } @@ -165,37 +157,9 @@ func buildMetrics(n *state.StateNode) (res []*metrics.StoreMetric) { res = append(res, &metrics.StoreMetric{ GaugeVec: gaugeVec, Value: lo.Ternary(resourceName == v1.ResourceCPU, float64(quantity.MilliValue())/float64(1000), float64(quantity.Value())), - Labels: getNodeLabels(n.Node, strings.ReplaceAll(strings.ToLower(string(resourceName)), "-", "_")), + Labels: GetNodeLabels(n.Node, strings.ReplaceAll(strings.ToLower(string(resourceName)), "-", "_")), }) } } return res } - -func getNodeLabels(node *v1.Node, resourceTypeName string) prometheus.Labels { - metricLabels := prometheus.Labels{} - metricLabels[resourceType] = resourceTypeName - metricLabels[nodeName] = node.Name - metricLabels[nodeProvisioner] = node.Labels[v1alpha5.ProvisionerNameLabelKey] - metricLabels[nodePhase] = string(node.Status.Phase) - - // Populate well known labels - for wellKnownLabel, label := range wellKnownLabels { - metricLabels[label] = node.Labels[wellKnownLabel] - } - return metricLabels -} - -func getWellKnownLabels() map[string]string { - labels := make(map[string]string) - // TODO @joinnis: Remove v1alpha5 well-known labels in favor of only v1beta1 well-known labels after v1alpha5 is dropped - for wellKnownLabel := range v1alpha5.WellKnownLabels.Union(v1beta1.WellKnownLabels) { - if parts := strings.Split(wellKnownLabel, "/"); len(parts) == 2 { - label := parts[1] - // Reformat label names to be consistent with Prometheus naming conventions (snake_case) - label = strings.ReplaceAll(strings.ToLower(label), "-", "_") - labels[wellKnownLabel] = label - } - } - return labels -} diff --git a/pkg/controllers/metrics/node/metrics.go b/pkg/controllers/metrics/node/metrics.go new file mode 100644 index 0000000000..04566a369e --- /dev/null +++ b/pkg/controllers/metrics/node/metrics.go @@ -0,0 +1,64 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "strings" + + "github.com/prometheus/client_golang/prometheus" + v1 "k8s.io/api/core/v1" + + "github.com/aws/karpenter-core/pkg/apis/v1alpha5" + "github.com/aws/karpenter-core/pkg/apis/v1beta1" + "github.com/aws/karpenter-core/pkg/metrics" +) + +const ( + ResourceType = "resource_type" + NodePhase = "phase" +) + +func GetWellKnownLabels() map[string]string { + labels := make(map[string]string) + // TODO @joinnis: Remove v1alpha5 well-known labels in favor of only v1beta1 well-known labels after v1alpha5 is dropped + for wellKnownLabel := range v1alpha5.WellKnownLabels.Union(v1beta1.WellKnownLabels) { + if parts := strings.Split(wellKnownLabel, "/"); len(parts) == 2 { + label := parts[1] + // Reformat label names to be consistent with Prometheus naming conventions (snake_case) + label = strings.ReplaceAll(strings.ToLower(label), "-", "_") + labels[wellKnownLabel] = label + } + } + return labels +} + +func GetNodeLabels(node *v1.Node, resourceTypeName string) prometheus.Labels { + metricLabels := prometheus.Labels{} + if resourceTypeName != "" { + metricLabels[ResourceType] = resourceTypeName + } + metricLabels[metrics.NodeName] = node.Name + metricLabels[metrics.ProvisionerLabel] = node.Labels[v1alpha5.ProvisionerNameLabelKey] + if node.Labels[v1beta1.NodePoolLabelKey] != "" { + metricLabels[metrics.NodePoolLabel] = node.Labels[v1beta1.NodePoolLabelKey] + } + metricLabels[NodePhase] = string(node.Status.Phase) + + // Populate well known labels + for wellKnownLabel, label := range wellKnownLabels { + metricLabels[label] = node.Labels[wellKnownLabel] + } + return metricLabels +} diff --git a/pkg/controllers/node/termination/controller.go b/pkg/controllers/node/termination/controller.go index c61aa95704..b75932736a 100644 --- a/pkg/controllers/node/termination/controller.go +++ b/pkg/controllers/node/termination/controller.go @@ -35,6 +35,7 @@ import ( "github.com/aws/karpenter-core/pkg/apis/v1alpha5" "github.com/aws/karpenter-core/pkg/apis/v1beta1" "github.com/aws/karpenter-core/pkg/cloudprovider" + nodemetrics "github.com/aws/karpenter-core/pkg/controllers/metrics/node" "github.com/aws/karpenter-core/pkg/controllers/node/termination/terminator" terminatorevents "github.com/aws/karpenter-core/pkg/controllers/node/termination/terminator/events" "github.com/aws/karpenter-core/pkg/events" @@ -94,6 +95,7 @@ func (c *Controller) Finalize(ctx context.Context, node *v1.Node) (reconcile.Res } return reconcile.Result{}, fmt.Errorf("getting machine, %w", err) } + NodeDrainTime.With(nodemetrics.GetNodeLabels(node, "")).Set(time.Since(node.DeletionTimestamp.Time).Seconds()) return reconcile.Result{RequeueAfter: 1 * time.Second}, nil } if err := c.cloudProvider.Delete(ctx, nodeclaimutil.NewFromNode(node)); cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil { diff --git a/pkg/controllers/node/termination/metrics.go b/pkg/controllers/node/termination/metrics.go index 538095265e..4c593f064c 100644 --- a/pkg/controllers/node/termination/metrics.go +++ b/pkg/controllers/node/termination/metrics.go @@ -16,9 +16,13 @@ package termination import ( "github.com/prometheus/client_golang/prometheus" + "github.com/samber/lo" + "k8s.io/apimachinery/pkg/util/sets" crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/aws/karpenter-core/pkg/metrics" + + nodemetrics "github.com/aws/karpenter-core/pkg/controllers/metrics/node" ) var ( @@ -32,8 +36,27 @@ var ( }, []string{metrics.ProvisionerLabel, metrics.NodePoolLabel}, ) + NodeDrainTime = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "karpenter", + Subsystem: "nodes", + Name: "drain_time_seconds", + Help: "The time taken to drain a node.", + }, + nodeLabelNames(), + ) ) +func nodeLabelNames() []string { + return append( + sets.New(lo.Values(nodemetrics.GetWellKnownLabels())...).UnsortedList(), + metrics.NodeName, + metrics.ProvisionerLabel, + nodemetrics.NodePhase, + ) +} + func init() { crmetrics.Registry.MustRegister(TerminationSummary) + crmetrics.Registry.MustRegister(NodeDrainTime) } diff --git a/pkg/metrics/constants.go b/pkg/metrics/constants.go index 8dbc25dca4..5de1c5e5ab 100644 --- a/pkg/metrics/constants.go +++ b/pkg/metrics/constants.go @@ -26,6 +26,7 @@ const ( ProvisionerLabel = "provisioner" NodePoolLabel = "nodepool" + NodeName = "node_name" ReasonLabel = "reason" TypeLabel = "type"