diff --git a/info/v1/container.go b/info/v1/container.go index 24287d0380..ae1d9caecc 100644 --- a/info/v1/container.go +++ b/info/v1/container.go @@ -333,6 +333,8 @@ type CpuStats struct { // Load is smoothed over the last 10 seconds. Instantaneous value can be read // from LoadStats.NrRunning. LoadAverage int32 `json:"load_average"` + // from LoadStats.NrUninterruptible + LoadDAverage int32 `json:"load_d_average"` } type PerDiskStats struct { diff --git a/manager/container.go b/manager/container.go index cf868953c9..4efbfda191 100644 --- a/manager/container.go +++ b/manager/container.go @@ -73,6 +73,7 @@ type containerData struct { loadReader cpuload.CpuLoadReader summaryReader *summary.StatsSummary loadAvg float64 // smoothed load average seen so far. + loadDAvg float64 // smoothed load.d average seen so far. housekeepingInterval time.Duration maxHousekeepingInterval time.Duration allowDynamicHousekeeping bool @@ -441,6 +442,7 @@ func newContainerData(containerName string, memoryCache *memory.InMemoryCache, h allowDynamicHousekeeping: allowDynamicHousekeeping, logUsage: logUsage, loadAvg: -1.0, // negative value indicates uninitialized. + loadDAvg: -1.0, // negative value indicates uninitialized. stop: make(chan struct{}), collectorManager: collectorManager, onDemandChan: make(chan chan struct{}, 100), @@ -633,6 +635,14 @@ func (cd *containerData) updateLoad(newLoad uint64) { } } +func (cd *containerData) updateLoadD(newLoad uint64) { + if cd.loadDAvg < 0 { + cd.loadDAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization. + } else { + cd.loadDAvg = cd.loadDAvg*cd.loadDecay + float64(newLoad)*(1.0-cd.loadDecay) + } +} + func (cd *containerData) updateStats() error { stats, statsErr := cd.handler.GetStats() if statsErr != nil { @@ -659,6 +669,10 @@ func (cd *containerData) updateStats() error { cd.updateLoad(loadStats.NrRunning) // convert to 'milliLoad' to avoid floats and preserve precision. stats.Cpu.LoadAverage = int32(cd.loadAvg * 1000) + + cd.updateLoadD(loadStats.NrUninterruptible) + // convert to 'milliLoad' to avoid floats and preserve precision. + stats.Cpu.LoadDAverage = int32(cd.loadDAvg * 1000) } } if cd.summaryReader != nil { diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 5dac3e116c..86064819d3 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -270,6 +270,13 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri getValues: func(s *info.ContainerStats) metricValues { return metricValues{{value: float64(s.Cpu.LoadAverage), timestamp: s.Timestamp}} }, + }, { + name: "container_cpu_load_d_average_10s", + help: "Value of container cpu load.d average over the last 10 seconds.", + valueType: prometheus.GaugeValue, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{{value: float64(s.Cpu.LoadDAverage), timestamp: s.Timestamp}} + }, }, { name: "container_tasks_state", help: "Number of tasks in given state", diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index 8ee7685d76..fd43b78148 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -326,7 +326,8 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req RunqueueTime: 479424566378, RunPeriods: 984285, }, - LoadAverage: 2, + LoadAverage: 2, + LoadDAverage: 2, }, Memory: info.MemoryStats{ Usage: 8, diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index aa1d01c1d0..a385e50689 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -21,6 +21,9 @@ container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",container # HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds. # TYPE container_cpu_load_average_10s gauge container_cpu_load_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000 +# HELP container_cpu_load_d_average_10s Value of container cpu load.d average over the last 10 seconds. +# TYPE container_cpu_load_d_average_10s gauge +container_cpu_load_d_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000 # HELP container_cpu_schedstat_run_periods_total Number of times processes of the cgroup have run on the cpu # TYPE container_cpu_schedstat_run_periods_total counter container_cpu_schedstat_run_periods_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 984285 1395066363000 diff --git a/metrics/testdata/prometheus_metrics_whitelist_filtered b/metrics/testdata/prometheus_metrics_whitelist_filtered index e8a1986a77..921b2e1106 100644 --- a/metrics/testdata/prometheus_metrics_whitelist_filtered +++ b/metrics/testdata/prometheus_metrics_whitelist_filtered @@ -21,6 +21,9 @@ container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",id="testc # HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds. # TYPE container_cpu_load_average_10s gauge container_cpu_load_average_10s{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000 +# HELP container_cpu_load_d_average_10s Value of container cpu load.d average over the last 10 seconds. +# TYPE container_cpu_load_d_average_10s gauge +container_cpu_load_d_average_10s{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 1395066363000 # HELP container_cpu_schedstat_run_periods_total Number of times processes of the cgroup have run on the cpu # TYPE container_cpu_schedstat_run_periods_total counter container_cpu_schedstat_run_periods_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 984285 1395066363000