Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify interpretation of produced metrics #23

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions cmd/kube-transition-metrics/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
//nolint:gosec
_ "net/http/pprof"
"os"
"time"

"github.com/BackMarket-oss/kube-transition-metrics/internal/options"
"github.com/BackMarket-oss/kube-transition-metrics/internal/prommetrics"
Expand Down Expand Up @@ -65,6 +66,8 @@ func main() {

options := options.Parse()
zerolog.SetGlobalLevel(options.LogLevel)
zerolog.DurationFieldInteger = false
zerolog.DurationFieldUnit = time.Second

config := getKubeconfig(options)
clientset, err := kubernetes.NewForConfig(config)
Expand Down
51 changes: 43 additions & 8 deletions internal/statistics/container_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ type containerStatistic struct {
pod *podStatistic
imagePull imagePullStatistic

// Previous init container, null if first init container or non-init container
previous *containerStatistic

// The timestamp for when the container first turned Running.
runningTimestamp time.Time

Expand Down Expand Up @@ -46,21 +49,53 @@ func (cs containerStatistic) logger() zerolog.Logger {
Logger()
}

func (cs containerStatistic) appendInitFields(event *zerolog.Event) {
if !cs.runningTimestamp.IsZero() && cs.previous != nil && !cs.previous.readyTimestamp.IsZero() {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cs.runningTimeStamp is always != nil ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, it's a time.Time not a pointer to time.Time so it's the time.Time zero value until set, which is—I believe—the unix epoch.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So yes technically this code won't work before 1970

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(it was a joke, happy to see that someone read my comment 😀 )

event.Dur("previous_to_running_seconds", cs.runningTimestamp.Sub(cs.previous.readyTimestamp))
}
}

func (cs containerStatistic) appendNonInitFields(event *zerolog.Event) {
if !cs.runningTimestamp.IsZero() && !cs.pod.initializingTimestamp.IsZero() {
event.Dur("initialized_to_running_seconds", cs.runningTimestamp.Sub(cs.pod.initializingTimestamp))
}
}

func (cs containerStatistic) event() *zerolog.Event {
event := zerolog.Dict()

event.Bool("init_container", cs.initContainer)
if cs.initContainer {
cs.appendInitFields(event)
} else {
cs.appendNonInitFields(event)
}

if !cs.runningTimestamp.IsZero() {
event.Time("running_timestamp", cs.runningTimestamp)
}
if !cs.startedTimestamp.IsZero() {
event.Float64("started_latency",
cs.startedTimestamp.Sub(cs.pod.creationTimestamp).Seconds())
event.Time("started_timestamp", cs.startedTimestamp)
if !cs.runningTimestamp.IsZero() {
event.Dur("running_to_started_seconds", cs.startedTimestamp.Sub(cs.runningTimestamp))
}
}
if !cs.readyTimestamp.IsZero() {
event.Float64("ready_latency",
cs.readyTimestamp.Sub(cs.pod.creationTimestamp).Seconds())
}
if !cs.runningTimestamp.IsZero() {
event.Float64("running_latency",
cs.runningTimestamp.Sub(cs.pod.creationTimestamp).Seconds())
event.Time("ready_timestamp", cs.readyTimestamp)

// As init containers do not supported startup, liveliness, or readiness probes the Started container status field is
// not set for init containers.
// Instead, readiness represents the time an init container has excited successfully,allowing the following containers
// to proceed.
Comment on lines +90 to +91
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Instead, readiness represents the time an init container has excited successfully,allowing the following containers
// to proceed.
// Instead, readiness represents the time until an init container has exited successfully, allowing the following
// containers to proceed.

// Given this, presenting both running_to_ready_seconds and started_to_ready_seconds is useful to cover the differing
// meanings for both container types.
// See: https://github.com/kubernetes/website/blob/b397a8f/content/en/docs/concepts/workloads/pods/init-containers.md
if !cs.runningTimestamp.IsZero() {
event.Dur("running_to_ready_seconds", cs.readyTimestamp.Sub(cs.runningTimestamp))
}
if !cs.startedTimestamp.IsZero() {
event.Dur("started_to_ready_seconds", cs.readyTimestamp.Sub(cs.runningTimestamp))
}
}

return event
Expand Down
10 changes: 5 additions & 5 deletions internal/statistics/image_pull_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ func (ev imagePullingEvent) handle(statistic *podStatistic) bool {
}
imagePullStatistic := &containerStatistic.imagePull

if !imagePullStatistic.finishedAt.IsZero() {
if !imagePullStatistic.finishedTimestamp.IsZero() {
logger.Debug().Str("container_name", ev.containerName).
Msg("Skipping event for initialized pod")
} else if imagePullStatistic.startedAt.IsZero() {
imagePullStatistic.startedAt = ev.event.FirstTimestamp.Time
} else if imagePullStatistic.startedTimestamp.IsZero() {
imagePullStatistic.startedTimestamp = ev.event.FirstTimestamp.Time
}

return false
Expand Down Expand Up @@ -194,8 +194,8 @@ func (ev imagePulledEvent) handle(statistic *podStatistic) bool {
}
imagePullStatistic := &containerStatistic.imagePull

if imagePullStatistic.finishedAt.IsZero() {
imagePullStatistic.finishedAt = ev.event.LastTimestamp.Time
if imagePullStatistic.finishedTimestamp.IsZero() {
imagePullStatistic.finishedTimestamp = ev.event.LastTimestamp.Time
}

imagePullStatistic.log(ev.event.Message)
Expand Down
19 changes: 11 additions & 8 deletions internal/statistics/image_pull_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,21 @@ import (
type imagePullStatistic struct {
container *containerStatistic

startedAt time.Time
finishedAt time.Time
startedTimestamp time.Time
finishedTimestamp time.Time
}

func (s imagePullStatistic) log(message string) {
metrics := zerolog.Dict()
metrics.Bool("init_container", s.container.initContainer)
if !s.finishedAt.IsZero() && !s.startedAt.IsZero() {
metrics.Float64(
"image_pull_duration",
s.finishedAt.Sub(s.startedAt).Seconds(),
)

if !s.startedTimestamp.IsZero() {
metrics.Time("started_timestamp", s.startedTimestamp)
}
if !s.finishedTimestamp.IsZero() {
metrics.Time("finished_timestamp", s.finishedTimestamp)
if !s.startedTimestamp.IsZero() {
metrics.Dur("duration_seconds", s.finishedTimestamp.Sub(s.startedTimestamp))
}
}

logger :=
Expand Down
41 changes: 24 additions & 17 deletions internal/statistics/pod_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ type podStatistic struct {
creationTimestamp time.Time

// The timestamp for when the pod was scheduled.
scheduledTimestamp time.Time
initializingTimestamp time.Time

// The timestamp for when the pod was initialized.
initializedTimestamp time.Time
runningTimestamp time.Time

// The timestamp for when the pod first turned Ready.
readyTimestamp time.Time
Expand All @@ -46,8 +46,11 @@ func (s *podStatistic) initialize(pod *corev1.Pod) {
s.initContainers = make(map[string]*containerStatistic)
s.containers = make(map[string]*containerStatistic)

var previous *containerStatistic
for _, container := range pod.Spec.InitContainers {
s.initContainers[container.Name] = newContainerStatistic(s, true, container)
s.initContainers[container.Name].previous = previous
previous = s.initContainers[container.Name]
}
for _, container := range pod.Spec.Containers {
s.containers[container.Name] = newContainerStatistic(s, false, container)
Expand All @@ -64,20 +67,24 @@ func (s podStatistic) logger() zerolog.Logger {
func (s podStatistic) event() *zerolog.Event {
event := zerolog.Dict()

if !s.scheduledTimestamp.IsZero() {
event.Float64(
"scheduled_latency",
s.scheduledTimestamp.Sub(s.creationTimestamp).Seconds())
event.Time("creation_timestamp", s.creationTimestamp)
if !s.initializingTimestamp.IsZero() {
event.Time("initializing_timestamp", s.initializingTimestamp)
event.Dur("creation_to_initializing_seconds", s.initializingTimestamp.Sub(s.creationTimestamp))
}
if !s.initializedTimestamp.IsZero() {
event.Float64(
"initialized_latency",
s.initializedTimestamp.Sub(s.creationTimestamp).Seconds())
if !s.runningTimestamp.IsZero() {
event.Time("running_timestamp", s.runningTimestamp)
event.Dur("creation_to_running_seconds", s.runningTimestamp.Sub(s.creationTimestamp))
if !s.initializingTimestamp.IsZero() {
event.Dur("initializing_to_running_seconds", s.runningTimestamp.Sub(s.initializingTimestamp))
}
}
if !s.readyTimestamp.IsZero() {
event.Float64(
"ready_latency",
s.readyTimestamp.Sub(s.creationTimestamp).Seconds())
event.Time("ready_timestamp", s.readyTimestamp)
event.Dur("creation_to_ready_seconds", s.readyTimestamp.Sub(s.creationTimestamp))
if !s.runningTimestamp.IsZero() {
event.Dur("running_to_ready_seconds", s.readyTimestamp.Sub(s.runningTimestamp))
}
}

return event
Expand Down Expand Up @@ -112,16 +119,16 @@ func (s *podStatistic) update(pod *corev1.Pod) {
//nolint:exhaustive
switch condition.Type {
case corev1.PodScheduled:
if s.scheduledTimestamp.IsZero() {
s.scheduledTimestamp = condition.LastTransitionTime.Time
if s.initializingTimestamp.IsZero() {
s.initializingTimestamp = condition.LastTransitionTime.Time
}
case corev1.PodInitialized:
// Pod Initialized occursafter all images pulled, no need to continue to
// track

if s.initializedTimestamp.IsZero() {
if s.runningTimestamp.IsZero() {
go s.imagePullCollector.cancel("pod_initialized")
s.initializedTimestamp = condition.LastTransitionTime.Time
s.runningTimestamp = condition.LastTransitionTime.Time
}
case corev1.PodReady:
if s.readyTimestamp.IsZero() {
Expand Down
4 changes: 2 additions & 2 deletions internal/statistics/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ func TestPodStatisticUpdate(t *testing.T) {
// Update the pod statistic for the "new" state
stat.update(pod)

assert.NotZero(t, stat.scheduledTimestamp, "scheduledTimestamp was not set")
assert.NotZero(t, stat.initializingTimestamp, "scheduledTimestamp was not set")
assert.NotZero(
t, stat.initializedTimestamp, "initializedTimestamp was not set")
t, stat.runningTimestamp, "initializedTimestamp was not set")
assert.NotEmpty(t, stat.containers, "containers map was not populated")

// Check that the imagePullCollector would have been canceled for the right
Expand Down
Loading
Loading