From 81e010f87680e3566a8946a4aa6a2ac4de1b0fe7 Mon Sep 17 00:00:00 2001 From: zhengya Date: Wed, 17 Jan 2024 16:04:45 +0800 Subject: [PATCH] [CONTROLLER/PROMETHEUS] fixes metric label synced_at bug --- cli/ctl/prometheus.go | 2 +- server/controller/http/router/prometheus.go | 6 ------ server/controller/prometheus/cache/cache.go | 14 ++++++++++++-- server/controller/prometheus/cache/metric_label.go | 2 +- server/controller/prometheus/cleaner.go | 2 +- server/server.yaml | 2 +- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/cli/ctl/prometheus.go b/cli/ctl/prometheus.go index abab164f7cb..5dfe3ec65dc 100644 --- a/cli/ctl/prometheus.go +++ b/cli/ctl/prometheus.go @@ -55,7 +55,7 @@ func RegisterPrometheusCommand() *cobra.Command { clearCmd := &cobra.Command{ Use: "clear", Short: "clear prometheus data in MySQL by deepflow-server, use with caution and not frequently!", - Example: "deepflow-ctl prometheus clear \"2006-01-02 15:04:05\"", + Example: "deepflow-ctl prometheus clear -e \"2006-01-02 15:04:05\"", Run: func(cmd *cobra.Command, args []string) { prometheusClear(cmd, expiredAt) }, diff --git a/server/controller/http/router/prometheus.go b/server/controller/http/router/prometheus.go index 1cdb545b00b..699c52bd840 100644 --- a/server/controller/http/router/prometheus.go +++ b/server/controller/http/router/prometheus.go @@ -42,16 +42,13 @@ func (p *Prometheus) RegisterTo(e *gin.Engine) { func createPrometheusCleanTask(c *gin.Context) { body := make(map[string]interface{}) err := c.ShouldBindBodyWith(&body, binding.JSON) - log.Errorf("body: %v", body) if err != nil { - log.Errorf("body: %v", err) routercommon.JsonResponse(c, body, err) return } isMaster, masterCtrlIP, httpPort, _, err := common.CheckSelfAndGetMasterControllerHostPort() if err != nil { - log.Errorf("body: %v", err) routercommon.JsonResponse(c, body, err) return } @@ -60,16 +57,13 @@ func createPrometheusCleanTask(c *gin.Context) { if e, ok := body["EXPIRED_AT"]; ok { expiredAt, err = time.Parse(common.GO_BIRTHDAY, e.(string)) if err != nil { - log.Errorf("body: %v", err) routercommon.JsonResponse(c, body, err) return } } - log.Errorf("body: %v", body) err = prometheus.GetCleaner().Clear(expiredAt) } else { _, err = common.CURLPerform(http.MethodPost, fmt.Sprintf("http://%s:%d/v1/prometheus-cleaner-tasks/", masterCtrlIP, httpPort), body) - log.Errorf("body: %v", err) } routercommon.JsonResponse(c, body, err) } diff --git a/server/controller/prometheus/cache/cache.go b/server/controller/prometheus/cache/cache.go index 0542e04cb60..289d7acb401 100644 --- a/server/controller/prometheus/cache/cache.go +++ b/server/controller/prometheus/cache/cache.go @@ -185,12 +185,17 @@ func GetDebugCache(t controller.PrometheusCacheType) []byte { getMetricAndAppLabelLayout := func() { temp := map[string]interface{}{ "layout_key_to_index": make(map[string]interface{}), + "layout_key_to_id": make(map[string]int), } tempCache.MetricAndAPPLabelLayout.layoutKeyToIndex.Range(func(key, value any) bool { temp["layout_key_to_index"].(map[string]interface{})[marshal(key)] = value return true }) - if len(temp["layout_key_to_index"].(map[string]interface{})) > 0 { + for iter := range tempCache.MetricAndAPPLabelLayout.layoutKeyToID.Iter() { + temp["layout_key_to_id"].(map[string]int)[iter.Key.String()] = iter.Val + } + if len(temp["layout_key_to_index"].(map[string]interface{})) > 0 || + len(temp["layout_key_to_id"].(map[string]int)) > 0 { content["metric_and_app_label_layout"] = temp } } @@ -237,14 +242,19 @@ func GetDebugCache(t controller.PrometheusCacheType) []byte { getMetricLabel := func() { temp := map[string]interface{}{ "metric_name_id_to_label_ids": make(map[int][]int), + "metric_label_key_to_id": make(map[string]int), } tempCache.MetricLabel.metricNameIDToLabelIDs.Range(func(i int, s mapset.Set[int]) bool { temp["metric_name_id_to_label_ids"].(map[int][]int)[i] = s.ToSlice() return true }) + for iter := range tempCache.MetricLabel.keyToID.Iter() { + temp["metric_label_key_to_id"].(map[string]int)[iter.Key.String()] = iter.Val + } - if len(temp["metric_name_id_to_label_ids"].(map[int][]int)) > 0 { + if len(temp["metric_name_id_to_label_ids"].(map[int][]int)) > 0 || + len(temp["metric_label_key_to_id"].(map[string]int)) > 0 { content["metric_label"] = temp } } diff --git a/server/controller/prometheus/cache/metric_label.go b/server/controller/prometheus/cache/metric_label.go index 220f4ed2185..c8442eaea52 100644 --- a/server/controller/prometheus/cache/metric_label.go +++ b/server/controller/prometheus/cache/metric_label.go @@ -123,6 +123,6 @@ func (ml *metricLabel) refresh(args ...interface{}) error { func (ml *metricLabel) load() ([]*mysql.PrometheusMetricLabel, error) { var metricLabels []*mysql.PrometheusMetricLabel - err := mysql.Db.Select("metric_name", "label_id").Find(&metricLabels).Error + err := mysql.Db.Select("metric_name", "label_id", "id").Find(&metricLabels).Error return metricLabels, err } diff --git a/server/controller/prometheus/cleaner.go b/server/controller/prometheus/cleaner.go index 6c3ea7e7d36..06e9b2779e1 100644 --- a/server/controller/prometheus/cleaner.go +++ b/server/controller/prometheus/cleaner.go @@ -91,7 +91,7 @@ func (c *Cleaner) Stop() { } func (c *Cleaner) Clear(expiredAt time.Time) error { - log.Infof("prometheus data cleaner clear by hand started") + log.Infof("prometheus data cleaner clear by hand") return c.clear(expiredAt) } diff --git a/server/server.yaml b/server/server.yaml index 6be2fee4a1c..9e9fb020723 100644 --- a/server/server.yaml +++ b/server/server.yaml @@ -290,7 +290,7 @@ controller: # encoder cache refresh interval, unit: second encoder_cache_refresh_interval: 3600 # time interval for regularly clearing prometheus expired data, unit: hour - # time interval should be less than or equal to ingester: prometheus-label-cache-expiration configuration + # time interval should be greater than or equal to ingester: prometheus-label-cache-expiration configuration data_clean_interval: 24 querier: