Skip to content

Commit

Permalink
chore: address some gaps in k8s monitoring (#6653)
Browse files Browse the repository at this point in the history
  • Loading branch information
srikanthccv authored Dec 19, 2024
1 parent cecc57e commit 77420b9
Show file tree
Hide file tree
Showing 16 changed files with 398 additions and 32 deletions.
20 changes: 17 additions & 3 deletions pkg/query-service/app/inframetrics/namespaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ var (
}

queryNamesForNamespaces = map[string][]string{
"cpu": {"A"},
"memory": {"D"},
"cpu": {"A"},
"memory": {"D"},
"pod_phase": {"H", "I", "J", "K"},
}
namespaceQueryNames = []string{"A", "D"}
namespaceQueryNames = []string{"A", "D", "H", "I", "J", "K"}

attributesKeysForNamespaces = []v3.AttributeKey{
{Key: "k8s_namespace_name"},
Expand Down Expand Up @@ -307,6 +308,19 @@ func (p *NamespacesRepo) GetNamespaceList(ctx context.Context, req model.Namespa
record.MemoryUsage = memory
}

if pending, ok := row.Data["H"].(float64); ok {
record.CountByPhase.Pending = int(pending)
}
if running, ok := row.Data["I"].(float64); ok {
record.CountByPhase.Running = int(running)
}
if succeeded, ok := row.Data["J"].(float64); ok {
record.CountByPhase.Succeeded = int(succeeded)
}
if failed, ok := row.Data["K"].(float64); ok {
record.CountByPhase.Failed = int(failed)
}

record.Meta = map[string]string{}
if _, ok := namespaceAttrs[record.NamespaceName]; ok {
record.Meta = namespaceAttrs[record.NamespaceName]
Expand Down
13 changes: 11 additions & 2 deletions pkg/query-service/app/inframetrics/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
var (
metricToUseForNodes = "k8s_node_cpu_utilization"

nodeAttrsToEnrich = []string{"k8s_node_name", "k8s_node_uid"}
nodeAttrsToEnrich = []string{"k8s_node_name", "k8s_node_uid", "k8s_cluster_name"}

k8sNodeUIDAttrKey = "k8s_node_uid"

Expand All @@ -27,13 +27,14 @@ var (
"memory": {"C"},
"memory_allocatable": {"D"},
}
nodeQueryNames = []string{"A", "B", "C", "D"}
nodeQueryNames = []string{"A", "B", "C", "D", "E", "F"}

metricNamesForNodes = map[string]string{
"cpu": "k8s_node_cpu_utilization",
"cpu_allocatable": "k8s_node_allocatable_cpu",
"memory": "k8s_node_memory_usage",
"memory_allocatable": "k8s_node_allocatable_memory",
"node_condition": "k8s_node_condition_ready",
}
)

Expand Down Expand Up @@ -325,6 +326,14 @@ func (p *NodesRepo) GetNodeList(ctx context.Context, req model.NodeListRequest)
record.NodeMemoryAllocatable = memory
}

if ready, ok := row.Data["E"].(float64); ok {
record.CountByCondition.Ready = int(ready)
}

if notReady, ok := row.Data["F"].(float64); ok {
record.CountByCondition.NotReady = int(notReady)
}

record.Meta = map[string]string{}
if _, ok := nodeAttrs[record.NodeUID]; ok {
record.Meta = nodeAttrs[record.NodeUID]
Expand Down
68 changes: 68 additions & 0 deletions pkg/query-service/app/inframetrics/nodes_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,74 @@ var NodesTableListQuery = v3.QueryRangeParamsV3{
SpaceAggregation: v3.SpaceAggregationSum,
Disabled: false,
},
// node conditions - Ready
"E": {
QueryName: "E",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricNamesForNodes["node_condition"],
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "__value",
},
Operator: v3.FilterOperatorEqual,
Value: 1,
},
},
},
GroupBy: []v3.AttributeKey{
{
Key: k8sNodeUIDAttrKey,
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
Expression: "E",
ReduceTo: v3.ReduceToOperatorAvg,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationSum,
Disabled: false,
},
// node conditions - NotReady
"F": {
QueryName: "F",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricNamesForNodes["node_condition"],
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "__value",
},
Operator: v3.FilterOperatorEqual,
Value: 0,
},
},
},
GroupBy: []v3.AttributeKey{
{
Key: k8sNodeUIDAttrKey,
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
Expression: "F",
ReduceTo: v3.ReduceToOperatorAvg,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationSum,
Disabled: false,
},
},
PanelType: v3.PanelTypeTable,
QueryType: v3.QueryTypeBuilder,
Expand Down
21 changes: 20 additions & 1 deletion pkg/query-service/app/inframetrics/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ var (
"k8s_daemonset_name",
"k8s_job_name",
"k8s_cronjob_name",
"k8s_cluster_name",
}

k8sPodUIDAttrKey = "k8s_pod_uid"
Expand All @@ -39,8 +40,9 @@ var (
"memory_request": {"E", "D"},
"memory_limit": {"F", "D"},
"restarts": {"G", "A"},
"pod_phase": {"H", "I", "J", "K"},
}
podQueryNames = []string{"A", "B", "C", "D", "E", "F", "G"}
podQueryNames = []string{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"}

metricNamesForPods = map[string]string{
"cpu": "k8s_pod_cpu_utilization",
Expand All @@ -50,6 +52,7 @@ var (
"memory_request": "k8s_pod_memory_request_utilization",
"memory_limit": "k8s_pod_memory_limit_utilization",
"restarts": "k8s_container_restarts",
"pod_phase": "k8s_pod_phase",
}
)

Expand Down Expand Up @@ -365,6 +368,22 @@ func (p *PodsRepo) GetPodList(ctx context.Context, req model.PodListRequest) (mo
record.RestartCount = int(restarts)
}

if pending, ok := row.Data["H"].(float64); ok {
record.CountByPhase.Pending = int(pending)
}

if running, ok := row.Data["I"].(float64); ok {
record.CountByPhase.Running = int(running)
}

if succeeded, ok := row.Data["J"].(float64); ok {
record.CountByPhase.Succeeded = int(succeeded)
}

if failed, ok := row.Data["K"].(float64); ok {
record.CountByPhase.Failed = int(failed)
}

record.Meta = map[string]string{}
if _, ok := podAttrs[record.PodUID]; ok {
record.Meta = podAttrs[record.PodUID]
Expand Down
144 changes: 140 additions & 4 deletions pkg/query-service/app/inframetrics/pods_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ var PodsTableListQuery = v3.QueryRangeParamsV3{
Expression: "B",
ReduceTo: v3.ReduceToOperatorAvg,
TimeAggregation: v3.TimeAggregationAvg,
SpaceAggregation: v3.SpaceAggregationSum,
SpaceAggregation: v3.SpaceAggregationAvg,
Disabled: false,
},
// pod cpu limit utilization
Expand All @@ -80,7 +80,7 @@ var PodsTableListQuery = v3.QueryRangeParamsV3{
Expression: "C",
ReduceTo: v3.ReduceToOperatorAvg,
TimeAggregation: v3.TimeAggregationAvg,
SpaceAggregation: v3.SpaceAggregationSum,
SpaceAggregation: v3.SpaceAggregationAvg,
Disabled: false,
},
// pod memory utilization
Expand Down Expand Up @@ -132,7 +132,7 @@ var PodsTableListQuery = v3.QueryRangeParamsV3{
Expression: "E",
ReduceTo: v3.ReduceToOperatorAvg,
TimeAggregation: v3.TimeAggregationAvg,
SpaceAggregation: v3.SpaceAggregationSum,
SpaceAggregation: v3.SpaceAggregationAvg,
Disabled: false,
},
// pod memory limit utilization
Expand All @@ -158,7 +158,7 @@ var PodsTableListQuery = v3.QueryRangeParamsV3{
Expression: "F",
ReduceTo: v3.ReduceToOperatorAvg,
TimeAggregation: v3.TimeAggregationAvg,
SpaceAggregation: v3.SpaceAggregationSum,
SpaceAggregation: v3.SpaceAggregationAvg,
Disabled: false,
},
"G": {
Expand Down Expand Up @@ -187,6 +187,142 @@ var PodsTableListQuery = v3.QueryRangeParamsV3{
Functions: []v3.Function{{Name: v3.FunctionNameRunningDiff}},
Disabled: false,
},
// pod phase pending
"H": {
QueryName: "H",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricNamesForPods["pod_phase"],
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "__value",
},
Operator: v3.FilterOperatorEqual,
Value: 1,
},
},
},
GroupBy: []v3.AttributeKey{
{
Key: k8sPodUIDAttrKey,
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
Expression: "H",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationCount,
Disabled: false,
},
// pod phase running
"I": {
QueryName: "I",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricNamesForPods["pod_phase"],
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "__value",
},
Operator: v3.FilterOperatorEqual,
Value: 2,
},
},
},
GroupBy: []v3.AttributeKey{
{
Key: k8sPodUIDAttrKey,
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
Expression: "I",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationCount,
Disabled: false,
},
// pod phase succeeded
"J": {
QueryName: "J",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricNamesForPods["pod_phase"],
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "__value",
},
Operator: v3.FilterOperatorEqual,
Value: 3,
},
},
},
GroupBy: []v3.AttributeKey{
{
Key: k8sPodUIDAttrKey,
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
Expression: "J",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationCount,
Disabled: false,
},
// pod phase failed
"K": {
QueryName: "K",
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricNamesForPods["pod_phase"],
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: &v3.FilterSet{
Operator: "AND",
Items: []v3.FilterItem{
{
Key: v3.AttributeKey{
Key: "__value",
},
Operator: v3.FilterOperatorEqual,
Value: 4,
},
},
},
GroupBy: []v3.AttributeKey{
{
Key: k8sPodUIDAttrKey,
DataType: v3.AttributeKeyDataTypeString,
Type: v3.AttributeKeyTypeResource,
},
},
Expression: "K",
ReduceTo: v3.ReduceToOperatorLast,
TimeAggregation: v3.TimeAggregationAnyLast,
SpaceAggregation: v3.SpaceAggregationCount,
Disabled: false,
},
},
PanelType: v3.PanelTypeTable,
QueryType: v3.QueryTypeBuilder,
Expand Down
Loading

0 comments on commit 77420b9

Please sign in to comment.