Skip to content

Commit

Permalink
Add support to record quantiles
Browse files Browse the repository at this point in the history
  • Loading branch information
wpjunior committed Oct 16, 2019
1 parent abd0a01 commit 4ee344a
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 13 deletions.
69 changes: 56 additions & 13 deletions slo/slo.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package slo

import (
"fmt"
"log"
"strings"

Expand All @@ -9,6 +10,24 @@ import (
"github.com/prometheus/prometheus/pkg/rulefmt"
)

var quantiles = []struct {
name string
quantile float64
}{
{
name: "p50",
quantile: 0.5,
},
{
name: "p95",
quantile: 0.95,
},
{
name: "p99",
quantile: 0.99,
},
}

type SLOSpec struct {
SLOS []SLO
}
Expand All @@ -23,17 +42,23 @@ func (block *ExprBlock) ComputeExpr(window, le string) string {
return replacer.Replace(block.Expr)
}

func (block *ExprBlock) ComputeQuantile(window string, quantile float64) string {
replacer := strings.NewReplacer("$window", window, "$quantile", fmt.Sprintf("%g", quantile))
return replacer.Replace(block.Expr)
}

type SLO struct {
Name string `yaml:"name"`
Objectives Objectives

HonorLabels bool `yaml:"honorLabels"`

TrafficRateRecord ExprBlock `yaml:"trafficRateRecord"`
ErrorRateRecord ExprBlock `yaml:"errorRateRecord"`
LatencyRecord ExprBlock `yaml:"latencyRecord"`
Labels map[string]string `yaml:"labels"`
Annotations map[string]string `yaml:"annotations"`
TrafficRateRecord ExprBlock `yaml:"trafficRateRecord"`
ErrorRateRecord ExprBlock `yaml:"errorRateRecord"`
LatencyRecord ExprBlock `yaml:"latencyRecord"`
LatencyQuantileRecord ExprBlock `yaml:"latencyQuantileRecord"`
Labels map[string]string `yaml:"labels"`
Annotations map[string]string `yaml:"annotations"`
}

type Objectives struct {
Expand Down Expand Up @@ -113,17 +138,35 @@ func (slo SLO) generateRules(bucket string) []rulefmt.Rule {
rules = append(rules, trafficRateRecord)
}

errorRateRecord := rulefmt.Rule{
Record: "slo:service_errors_total:ratio_rate_" + bucket,
Expr: slo.ErrorRateRecord.ComputeExpr(bucket, ""),
Labels: map[string]string{},
}
if slo.ErrorRateRecord.Expr != "" {
errorRateRecord := rulefmt.Rule{
Record: "slo:service_errors_total:ratio_rate_" + bucket,
Expr: slo.ErrorRateRecord.ComputeExpr(bucket, ""),
Labels: map[string]string{},
}

if !slo.HonorLabels {
errorRateRecord.Labels["service"] = slo.Name
if !slo.HonorLabels {
errorRateRecord.Labels["service"] = slo.Name
}

rules = append(rules, errorRateRecord)
}

rules = append(rules, errorRateRecord)
if slo.LatencyQuantileRecord.Expr != "" {
for _, quantile := range quantiles {
latencyQuantileRecord := rulefmt.Rule{
Record: "slo:service_latency:" + quantile.name + "_" + bucket,
Expr: slo.LatencyQuantileRecord.ComputeQuantile(bucket, quantile.quantile),
Labels: map[string]string{},
}

if !slo.HonorLabels {
latencyQuantileRecord.Labels["service"] = slo.Name
}

rules = append(rules, latencyQuantileRecord)
}
}

for _, latencyBucket := range slo.Objectives.Latency {
latencyRateRecord := rulefmt.Rule{
Expand Down
147 changes: 147 additions & 0 deletions slo/slo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,153 @@ func TestSLOGenerateGroupRules(t *testing.T) {
})
}

func TestSLOGenerateGroupRulesWithLatencyQuantile(t *testing.T) {
slo := &SLO{
Name: "auto-discover-services",
HonorLabels: true,
LatencyQuantileRecord: ExprBlock{
Expr: "histogram_quantile($quantile, sum by (le) (rate(http_total[$window])))",
},
}

groupRules := slo.GenerateGroupRules()
assert.Len(t, groupRules, 3)

assert.Equal(t, rulefmt.RuleGroup{
Name: "slo:auto-discover-services:short",
Interval: model.Duration(time.Second * 30),
Rules: []rulefmt.Rule{
// 5m
{
Record: "slo:service_latency:p50_5m",
Expr: "histogram_quantile(0.5, sum by (le) (rate(http_total[5m])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p95_5m",
Expr: "histogram_quantile(0.95, sum by (le) (rate(http_total[5m])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p99_5m",
Expr: "histogram_quantile(0.99, sum by (le) (rate(http_total[5m])))",
Labels: map[string]string{},
},
// 30m
{
Record: "slo:service_latency:p50_30m",
Expr: "histogram_quantile(0.5, sum by (le) (rate(http_total[30m])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p95_30m",
Expr: "histogram_quantile(0.95, sum by (le) (rate(http_total[30m])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p99_30m",
Expr: "histogram_quantile(0.99, sum by (le) (rate(http_total[30m])))",
Labels: map[string]string{},
},
// 1h
{
Record: "slo:service_latency:p50_1h",
Expr: "histogram_quantile(0.5, sum by (le) (rate(http_total[1h])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p95_1h",
Expr: "histogram_quantile(0.95, sum by (le) (rate(http_total[1h])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p99_1h",
Expr: "histogram_quantile(0.99, sum by (le) (rate(http_total[1h])))",
Labels: map[string]string{},
},
},
}, groupRules[0])

assert.Equal(t, rulefmt.RuleGroup{
Name: "slo:auto-discover-services:medium",
Interval: model.Duration(time.Second * 120),
Rules: []rulefmt.Rule{
// 2h
{
Record: "slo:service_latency:p50_2h",
Expr: "histogram_quantile(0.5, sum by (le) (rate(http_total[2h])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p95_2h",
Expr: "histogram_quantile(0.95, sum by (le) (rate(http_total[2h])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p99_2h",
Expr: "histogram_quantile(0.99, sum by (le) (rate(http_total[2h])))",
Labels: map[string]string{},
},
// 6h
{
Record: "slo:service_latency:p50_6h",
Expr: "histogram_quantile(0.5, sum by (le) (rate(http_total[6h])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p95_6h",
Expr: "histogram_quantile(0.95, sum by (le) (rate(http_total[6h])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p99_6h",
Expr: "histogram_quantile(0.99, sum by (le) (rate(http_total[6h])))",
Labels: map[string]string{},
},
},
}, groupRules[1])

assert.Equal(t, rulefmt.RuleGroup{
Name: "slo:auto-discover-services:daily",
Interval: model.Duration(time.Second * 300),
Rules: []rulefmt.Rule{
// 1d
{
Record: "slo:service_latency:p50_1d",
Expr: "histogram_quantile(0.5, sum by (le) (rate(http_total[1d])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p95_1d",
Expr: "histogram_quantile(0.95, sum by (le) (rate(http_total[1d])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p99_1d",
Expr: "histogram_quantile(0.99, sum by (le) (rate(http_total[1d])))",
Labels: map[string]string{},
},

// 3d
{
Record: "slo:service_latency:p50_3d",
Expr: "histogram_quantile(0.5, sum by (le) (rate(http_total[3d])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p95_3d",
Expr: "histogram_quantile(0.95, sum by (le) (rate(http_total[3d])))",
Labels: map[string]string{},
},
{
Record: "slo:service_latency:p99_3d",
Expr: "histogram_quantile(0.99, sum by (le) (rate(http_total[3d])))",
Labels: map[string]string{},
},
},
}, groupRules[2])
}

func TestSLOGenerateGroupRulesWithAutoDiscovery(t *testing.T) {
slo := &SLO{
Name: "auto-discover-services",
Expand Down

0 comments on commit 4ee344a

Please sign in to comment.