Skip to content

Commit

Permalink
Add resources for creating ML managed alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
csmarchbanks committed Jul 9, 2024
1 parent 6beb516 commit cf5e74e
Show file tree
Hide file tree
Showing 5 changed files with 315 additions and 1 deletion.
36 changes: 36 additions & 0 deletions examples/resources/grafana_machine_learning_job/alert.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
resource "grafana_data_source" "foo" {
type = "prometheus"
name = "prometheus-ds-test"
uid = "prometheus-ds-test-uid"
url = "https://my-instance.com"
basic_auth_enabled = true
basic_auth_username = "username"

json_data_encoded = jsonencode({
httpMethod = "POST"
prometheusType = "Mimir"
prometheusVersion = "2.4.0"
})

secure_json_data_encoded = jsonencode({
basicAuthPassword = "password"
})
}

resource "grafana_machine_learning_job" "test_job" {
name = "Test Job"
metric = "tf_test_job"
datasource_type = "prometheus"
datasource_uid = grafana_data_source.foo.uid
query_params = {
expr = "grafanacloud_grafana_instance_active_user_count"
}
}

resource "grafana_machine_learning_alert" "test_alert" {
job = grafana_machine_learning_job.test_job
title = "Test Job Alert"
anomalyCondition = "any"
threshould = ">0.8"
window = "15m"
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/grafana/amixr-api-go-client v0.0.12 // main branch
github.com/grafana/grafana-com-public-clients/go/gcom v0.0.0-20240322153219-42c6a1d2bcab
github.com/grafana/grafana-openapi-client-go v0.0.0-20240523010106-657d101fcbd9
github.com/grafana/machine-learning-go-client v0.7.0
github.com/grafana/machine-learning-go-client v0.7.1-0.20240703190259-47c2986b5066
github.com/grafana/slo-openapi-client/go v0.0.0-20240626093634-e6741482b090
github.com/grafana/synthetic-monitoring-agent v0.24.3
github.com/grafana/synthetic-monitoring-api-go-client v0.8.0
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ github.com/grafana/grafana-plugin-sdk-go v0.235.0 h1:UnZ/iBDvCkfDgwR94opi8trAWJX
github.com/grafana/grafana-plugin-sdk-go v0.235.0/go.mod h1:6n9LbrjGL3xAATntYVNcIi90G9BVHRJjzHKz5FXVfWw=
github.com/grafana/machine-learning-go-client v0.7.0 h1:yiRBg8rCNbHh9BURa+vtZ8ItRYvabbdYAtsAOfxoFPI=
github.com/grafana/machine-learning-go-client v0.7.0/go.mod h1:bKsLSJTreH7HXaL2FJnnrliMuP0L8XwMkXte6AgwFFg=
github.com/grafana/machine-learning-go-client v0.7.1-0.20240702191547-8b1f45f0374c h1:ebjonuo+s2BXsD/pq3slOZc4N8ZXf2Urfvj6dOFu+vg=
github.com/grafana/machine-learning-go-client v0.7.1-0.20240702191547-8b1f45f0374c/go.mod h1:9xRIoH6Y6RubuCPNjLfpckE/fLVe9dazg3HSLI1ARAU=
github.com/grafana/machine-learning-go-client v0.7.1-0.20240703190259-47c2986b5066 h1:eODY6pbanmIfNDC5tJ76zknDjbXAZS5OOMUXLSXjrBw=
github.com/grafana/machine-learning-go-client v0.7.1-0.20240703190259-47c2986b5066/go.mod h1:9xRIoH6Y6RubuCPNjLfpckE/fLVe9dazg3HSLI1ARAU=
github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8=
github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls=
github.com/grafana/pyroscope-go/godeltaprof v0.1.7 h1:C11j63y7gymiW8VugJ9ZW0pWfxTZugdSJyC48olk5KY=
Expand Down
192 changes: 192 additions & 0 deletions internal/resources/machinelearning/resource_alert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package machinelearning

import (
"context"

"github.com/grafana/machine-learning-go-client/mlapi"
"github.com/grafana/terraform-provider-grafana/v3/internal/common"
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation"
"github.com/prometheus/common/model"
)

var resourceAlertID = common.NewResourceID(common.StringIDField("id"))

func resourceAlert() *common.Resource {
schema := &schema.Resource{

Description: `
A job defines the queries and model parameters for a machine learning task.
`,

CreateContext: checkClient(resourceAlertCreate),
ReadContext: checkClient(resourceAlertRead),
UpdateContext: checkClient(resourceAlertUpdate),
DeleteContext: checkClient(resourceAlertDelete),
Importer: &schema.ResourceImporter{
StateContext: schema.ImportStatePassthroughContext,
},

Schema: map[string]*schema.Schema{
"job": {
Description: "The forecast this alert belongs to.",
Type: schema.TypeString,
Optional: true,
ForceNew: true,
ExactlyOneOf: []string{"job", "outlier"},
},
"outlier": {
Description: "The forecast this alert belongs to.",
Type: schema.TypeString,
Optional: true,
ForceNew: true,
ExactlyOneOf: []string{"job", "outlier"},
},
"id": {
Description: "The ID of the alert.",
Type: schema.TypeString,
Computed: true,
},
"title": {
Description: "The title of the alert.",
Type: schema.TypeString,
Required: true,
},
"anomalyCondition": {
Description: "The condition for when to consider a point as anomalous.",
Type: schema.TypeString,
Optional: true,
ValidateFunc: validation.StringInSlice([]string{"any", "low", "high"}, false),
},
"for": {
Description: "How long values must be anomalous before firing an alert.",
Type: schema.TypeString,
Optional: true,
},
"threshold": {
Description: "The threshold of points over the window that need to be anomalous to alert.",
Type: schema.TypeString,
Optional: true,
},
"window": {
Description: "How much time to average values over",
Type: schema.TypeString,
Optional: true,
},
"labels": {
Description: "Labels to add to the alert generated in Grafana.",
Type: schema.TypeMap,
Optional: true,
},
"annotations": {
Description: "Annotations to add to the alert generated in Grafana.",
Type: schema.TypeMap,
Optional: true,
},
"noDataState": {
Description: "How the alert should be processed when no data is returned by the underlying series",
Type: schema.TypeString,
Optional: true,
ValidateFunc: validation.StringInSlice([]string{"Alerting", "NoData", "OK"}, false),
},
},
}

return common.NewLegacySDKResource(
common.CategoryMachineLearning,
"grafana_machine_learning_alert",
resourceAlertID,
schema,
)
}

func resourceAlertCreate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
c := meta.(*common.Client).MLAPI
alert, err := makeMLAlert(d)
if err != nil {
return diag.FromErr(err)
}
jobID := d.Get("job").(string)
alert, err = c.NewJobAlert(ctx, jobID, alert)
if err != nil {
return diag.FromErr(err)
}
d.SetId(alert.ID)
return resourceAlertRead(ctx, d, meta)
}

func resourceAlertRead(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
c := meta.(*common.Client).MLAPI
jobID := d.Get("job").(string)
alert, err := c.JobAlert(ctx, jobID, d.Id())
if err, shouldReturn := common.CheckReadError("alert", d, err); shouldReturn {
return err
}

d.Set("title", alert.Title)
d.Set("anomalyCondition", alert.AnomalyCondition)
d.Set("for", alert.For)
d.Set("threshold", alert.Threshold)
d.Set("window", alert.Window)
d.Set("labels", alert.Labels)
d.Set("annotations", alert.Annotations)
d.Set("noDataState", alert.NoDataState)

return nil
}

func resourceAlertUpdate(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
c := meta.(*common.Client).MLAPI
alert, err := makeMLAlert(d)
if err != nil {
return diag.FromErr(err)
}
jobID := d.Get("job").(string)
_, err = c.UpdateJobAlert(ctx, jobID, alert)
if err != nil {
return diag.FromErr(err)
}
return resourceAlertRead(ctx, d, meta)
}

func resourceAlertDelete(ctx context.Context, d *schema.ResourceData, meta interface{}) diag.Diagnostics {
c := meta.(*common.Client).MLAPI
jobID := d.Get("job").(string)
err := c.DeleteJobAlert(ctx, jobID, d.Id())
if err != nil {
return diag.FromErr(err)
}
d.SetId("")
return nil
}

func makeMLAlert(d *schema.ResourceData) (mlapi.Alert, error) {
forClause, err := model.ParseDuration(d.Get("for").(string))
if err != nil {
return mlapi.Alert{}, err
}
window, err := model.ParseDuration(d.Get("window").(string))
if err != nil {
return mlapi.Alert{}, err
}
labels := map[string]string{}
for k, v := range d.Get("labels").(map[string]interface{}) {
labels[k] = v.(string)
}
annotations := map[string]string{}
for k, v := range d.Get("annotations").(map[string]interface{}) {
annotations[k] = v.(string)
}
return mlapi.Alert{
ID: d.Id(),
Title: d.Get("title").(string),
AnomalyCondition: mlapi.AnomalyCondition(d.Get("anomalyCondition").(string)),
For: forClause,
Threshold: d.Get("threshold").(string),
Window: window,
Labels: labels,
Annotations: annotations,
NoDataState: mlapi.NoDataState(d.Get("noDataState").(string)),
}, nil
}
82 changes: 82 additions & 0 deletions internal/resources/machinelearning/resource_alert_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package machinelearning_test

import (
"context"
"fmt"
"testing"

"github.com/grafana/machine-learning-go-client/mlapi"
"github.com/grafana/terraform-provider-grafana/v3/internal/common"
"github.com/grafana/terraform-provider-grafana/v3/internal/testutils"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/acctest"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"
)

func TestAccResourceJobAlert(t *testing.T) {
testutils.CheckCloudInstanceTestsEnabled(t)

randomJobName := acctest.RandomWithPrefix("Test Job")
randomAlertName := acctest.RandomWithPrefix("Test Job Alert")

var job mlapi.Job
var alert mlapi.Alert
resource.ParallelTest(t, resource.TestCase{
ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories,
CheckDestroy: resource.ComposeTestCheckFunc(
testAccMLJobCheckDestroy(&job),
testAccDatasourceCheckDestroy(),
),
Steps: []resource.TestStep{
{
Config: testutils.TestAccExampleWithReplace(t, "resources/grafana_machine_learning_job/alert.tf", map[string]string{
"Test Job": randomJobName,
"Test Job Alert": randomAlertName,
}),
Check: resource.ComposeTestCheckFunc(
testAccMLJobCheckExists("grafana_machine_learning_job.test_job", &job),
testAccMLJobAlertCheckExists("grafana_machine_learning_job.test_job_alert", &job, &alert),
),
},
},
})
}

func testAccMLJobAlertCheckExists(rn string, job *mlapi.Job, alert *mlapi.Alert) resource.TestCheckFunc {
return func(s *terraform.State) error {
rs, ok := s.RootModule().Resources[rn]
if !ok {
return fmt.Errorf("resource not found: %s\n %#v", rn, s.RootModule().Resources)
}

if rs.Primary.ID == "" {
return fmt.Errorf("resource id not set")
}

client := testutils.Provider.Meta().(*common.Client).MLAPI
gotAlert, err := client.JobAlert(context.Background(), job.ID, rs.Primary.ID)
if err != nil {
return fmt.Errorf("error getting job: %s", err)
}

*alert = gotAlert

return nil
}
}

func testAccMLJobAlertCheckDestroy(job *mlapi.Job, alert *mlapi.Alert) resource.TestCheckFunc {
return func(s *terraform.State) error {
// This check is to make sure that no pointer conversions are incorrect
// while mutating job.
if alert.ID == "" {
return fmt.Errorf("checking deletion of empty id")
}
client := testutils.Provider.Meta().(*common.Client).MLAPI
_, err := client.JobAlert(context.Background(), job.ID, alert.ID)
if err == nil {
return fmt.Errorf("job still exists on server")
}
return nil
}
}

0 comments on commit cf5e74e

Please sign in to comment.