diff --git a/dashboards/activemq/README.md b/dashboards/activemq/README.md index f64abd97da..f6c0ec8e6f 100644 --- a/dashboards/activemq/README.md +++ b/dashboards/activemq/README.md @@ -13,4 +13,4 @@ |ActiveMQ Prometheus Overview| |:------------------| |Filename: [activemq-prometheus-overview.json](activemq-prometheus-overview.json)| -|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter). This dashboard has charts displaying: `Memory Usage`, `Store Usage`, `Temp Usage`, `Connections`, `Producers`, `Messages Waiting`, `Messages Enqueued`, `Messages Dequeued`,`Total Messages`,`Consumers`, `Average Wait Time`. | +|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter). This dashboard has charts displaying: `Connections`, `Memory Usage`, `Store Usage`, `Temp Usage`, `Total Connections`, `Total Consumers`, `Total Producers`, `Total Messages`, `Total Messages Enqueued`, `Total Messages Dequeued`, `Topic Message Size`, `Topic Producers`, `Topic Consumers`, `Topic Dispatch Count`, `Topic Average Blocked Time`, `Topics Expired`, `Topic Queue Size`, `Topic Enqueues`, and `Topic Dequeues`. | diff --git a/dashboards/activemq/activemq-prometheus-overview.01.png b/dashboards/activemq/activemq-prometheus-overview.01.png index 138bfe6eea..5b90a8181d 100644 Binary files a/dashboards/activemq/activemq-prometheus-overview.01.png and b/dashboards/activemq/activemq-prometheus-overview.01.png differ diff --git a/dashboards/activemq/activemq-prometheus-overview.02.png b/dashboards/activemq/activemq-prometheus-overview.02.png index e4ce9c729a..d2bb9a7399 100644 Binary files a/dashboards/activemq/activemq-prometheus-overview.02.png and b/dashboards/activemq/activemq-prometheus-overview.02.png differ diff --git a/dashboards/activemq/activemq-prometheus-overview.json b/dashboards/activemq/activemq-prometheus-overview.json index 1a3cbdb1a9..2126ffcad4 100644 --- a/dashboards/activemq/activemq-prometheus-overview.json +++ b/dashboards/activemq/activemq-prometheus-overview.json @@ -1,5 +1,6 @@ { "category": "CUSTOM", + "displayName": "ActiveMQ Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,13 +18,80 @@ "templateVariable": "Namespace" } ], - "displayName": "ActiveMQ Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "width": 12, + "height": 8, + "widget": { + "title": "Connections", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(activemq_connections{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 12, + "width": 12, + "height": 8, + "widget": { + "title": "Memory Usage", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(activemq_memory_usage_ratio{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 24, + "width": 12, + "height": 8, + "widget": { + "title": "Store Usage", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(activemq_store_usage_ratio{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 36, + "width": 12, + "height": 8, + "widget": { + "title": "Temp Usage", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(activemq_temp_usage_ratio{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "yPos": 8, + "width": 16, + "height": 16, "widget": { "title": "Total Connections", "xyChart": { @@ -32,25 +100,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_connections_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_connections_total{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 0 + } }, { - "height": 4, + "xPos": 16, + "yPos": 8, + "width": 16, + "height": 16, "widget": { "title": "Total Consumers", "xyChart": { @@ -59,25 +133,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_consumer_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_consumer_total{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 0 + } }, { - "height": 4, + "xPos": 32, + "yPos": 8, + "width": 16, + "height": 16, "widget": { "title": "Total Producers", "xyChart": { @@ -86,25 +166,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_producer_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_producer_total{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 0 + } }, { - "height": 4, + "yPos": 24, + "width": 16, + "height": 16, "widget": { "title": "Memory Usage", "xyChart": { @@ -113,25 +198,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_memory_usage_ratio{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_memory_usage_ratio{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 4 + } }, { - "height": 4, + "xPos": 16, + "yPos": 24, + "width": 16, + "height": 16, "widget": { "title": "Store Usage", "xyChart": { @@ -140,25 +231,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_store_usage_ratio{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_store_usage_ratio{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 4 + } }, { - "height": 4, + "xPos": 32, + "yPos": 24, + "width": 16, + "height": 16, "widget": { "title": "Temp Usage", "xyChart": { @@ -167,79 +264,334 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_temp_usage_ratio{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_temp_usage_ratio{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 4 + } }, { - "height": 4, + "yPos": 40, + "width": 16, + "height": 16, "widget": { - "title": "Topic Average Blocked Time", + "title": "Total Messages", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_topic_average_blocked_time{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_message_total{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 8 + } }, { - "height": 4, + "xPos": 16, + "yPos": 40, + "width": 16, + "height": 16, "widget": { - "title": "Total Messages", + "title": "Total Messages Enqueued", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_message_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_enqueue_total{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 8 + } }, { + "xPos": 32, + "yPos": 40, + "width": 16, + "height": 16, + "widget": { + "title": "Total Messages Dequeued", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_dequeue_total{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 56, + "width": 48, "height": 4, + "widget": { + "title": "Topic", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 60, + "width": 16, + "height": 16, + "widget": { + "title": "Topic Message Size", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_max_message_size{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + }, + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_average_message_size{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + }, + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_min_message_size{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 16, + "yPos": 60, + "width": 16, + "height": 16, + "widget": { + "title": "Topic Producers", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_producer_count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 32, + "yPos": 60, + "width": 16, + "height": 16, + "widget": { + "title": "Topic Consumers", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_consumer_count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 76, + "width": 16, + "height": 16, + "widget": { + "title": "Topic Dispatch Count", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_dispatch_count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 16, + "yPos": 76, + "width": 16, + "height": 16, + "widget": { + "title": "Topic Average Blocked Time", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_average_blocked_time{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 32, + "yPos": 76, + "width": 16, + "height": 16, "widget": { "title": "Topics Expired", "xyChart": { @@ -248,77 +600,125 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_topic_expired_count{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_topic_expired_count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 8 + } }, { - "height": 4, + "yPos": 92, + "width": 16, + "height": 16, "widget": { - "title": "Messages Enqueued", + "title": "Topic Queue Size", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_enqueue_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_topic_queue_size{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 12 + } }, { - "height": 4, + "xPos": 16, + "yPos": 92, + "width": 16, + "height": 16, + "widget": { + "title": "Topic Enqueues", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "activemq_topic_enqueue_count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 32, + "yPos": 92, + "width": 16, + "height": 16, "widget": { - "title": "Messages Dequeued", + "title": "Topic Dequeues", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "activemq_dequeue_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "activemq_topic_dequeue_count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 12 + } } ] - } + }, + "labels": {} } diff --git a/dashboards/activemq/metadata.yaml b/dashboards/activemq/metadata.yaml index f0d6edbc14..34073c374e 100644 --- a/dashboards/activemq/metadata.yaml +++ b/dashboards/activemq/metadata.yaml @@ -11,7 +11,7 @@ sample_dashboards: category: ActiveMQ id: activemq-prometheus-overview display_name: ActiveMQ Prometheus Overview - description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter). This dashboard has charts displaying: Memory Usage, Store Usage, Temp Usage, Connections, Producers, Messages Waiting, Messages Enqueued, Messages Dequeued,Total Messages,Consumers, Average Wait Time." + description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter). This dashboard has charts displaying: Connections, Memory Usage, Store Usage, Temp Usage, Total Connections, Total Consumers, Total Producers, Total Messages, Total Messages Enqueued, Total Messages Dequeued, Topic Message Size, Topic Producers, Topic Consumers, Topic Dispatch Count, Topic Average Blocked Time, Topics Expired, Topic Queue Size, Topic Enqueues, and Topic Dequeues." related_integrations: - id: activemq platform: GKE diff --git a/dashboards/couchdb/README.md b/dashboards/couchdb/README.md index 4ce33d852e..9b9f2b023f 100644 --- a/dashboards/couchdb/README.md +++ b/dashboards/couchdb/README.md @@ -14,4 +14,4 @@ |CouchDB Prometheus Overview| |:------------------| |Filename: [couchdb-prometheus-overview.json](couchdb-prometheus-overview.json)| -|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/gesellix/couchdb-prometheus-exporter), including `Requests by Method`, `Responses by Status Code`, `Average Request Time`, `Bulk Requests`, `Views`, `Database Operations`, `Open Files Descriptors`, and `Open Databases`. | +|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/gesellix/couchdb-prometheus-exporter), including `Service Status`, `Authentication Cache Hits`, `Authentication Cache Misses`, `Open Databases`, `Open Files Descriptors`, `Responses by Status Code`, `Average Request Time`, `Requests by Method`, `Database Operations`, `Bulk Requests`, and `Views`. | diff --git a/dashboards/couchdb/couchdb-prometheus-overview.01.png b/dashboards/couchdb/couchdb-prometheus-overview.01.png deleted file mode 100644 index 091b16ac27..0000000000 Binary files a/dashboards/couchdb/couchdb-prometheus-overview.01.png and /dev/null differ diff --git a/dashboards/couchdb/couchdb-prometheus-overview.02.png b/dashboards/couchdb/couchdb-prometheus-overview.02.png deleted file mode 100644 index c837859de1..0000000000 Binary files a/dashboards/couchdb/couchdb-prometheus-overview.02.png and /dev/null differ diff --git a/dashboards/couchdb/couchdb-prometheus-overview.json b/dashboards/couchdb/couchdb-prometheus-overview.json index e37b6d5aaa..37536793a5 100644 --- a/dashboards/couchdb/couchdb-prometheus-overview.json +++ b/dashboards/couchdb/couchdb-prometheus-overview.json @@ -1,5 +1,5 @@ { - "category": "CUSTOM", + "displayName": "CouchDB Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,150 +17,273 @@ "templateVariable": "Namespace" } ], - "displayName": "CouchDB Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "width": 16, + "height": 8, "widget": { - "title": "Requests by Method", + "title": "Service Status", + "scorecard": { + "thresholds": [ + { + "color": "RED", + "direction": "BELOW", + "label": "", + "value": 1 + } + ], + "timeSeriesQuery": { + "prometheusQuery": "couchdb_httpd_up{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + } + }, + { + "xPos": 16, + "width": 16, + "height": 8, + "widget": { + "title": "Authentication Cache Hits", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "couchdb_httpd_auth_cache_hits{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + } + }, + { + "xPos": 32, + "width": 16, + "height": 8, + "widget": { + "title": "Authentication Cache Misses", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "couchdb_httpd_auth_cache_misses{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + } + }, + { + "yPos": 8, + "width": 24, + "height": 16, + "widget": { + "title": "Open Databases", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(couchdb_httpd_request_methods{${Cluster},${Location},${Namespace}}[5m])" + "prometheusQuery": "couchdb_httpd_open_databases{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 0 + } }, { - "height": 4, + "xPos": 24, + "yPos": 8, + "width": 24, + "height": 16, "widget": { - "title": "Average Request Time", + "title": "Open File Descriptors", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "avg(couchdb_httpd_request_time{${Cluster},${Location},${Namespace}})" + "prometheusQuery": "couchdb_httpd_open_os_files{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 4 + } }, { + "yPos": 24, + "width": 48, "height": 4, "widget": { - "title": "Views", + "title": "Performance", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 28, + "width": 24, + "height": 16, + "widget": { + "title": "Responses by Status Code", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "couchdb_httpd_view_reads{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(couchdb_httpd_status_codes{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 8 + } }, { - "height": 4, + "xPos": 24, + "yPos": 28, + "width": 24, + "height": 16, "widget": { - "title": "Responses by Status Code", + "title": "Average Request Time", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(couchdb_httpd_status_codes{${Cluster},${Location},${Namespace}}[5m])" + "prometheusQuery": "avg(couchdb_httpd_request_time{${Cluster},${Location},${Namespace}})", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 0 + } }, { + "yPos": 44, + "width": 48, "height": 4, "widget": { - "title": "Bulk Requests", + "title": "Throughput", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 48, + "width": 24, + "height": 16, + "widget": { + "title": "Requests by Method", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "couchdb_httpd_bulk_requests{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(couchdb_httpd_request_methods{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 4 + } }, { - "height": 4, + "xPos": 24, + "yPos": 48, + "width": 24, + "height": 16, "widget": { "title": "Database Operations", "xyChart": { @@ -169,84 +292,103 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "couchdb_httpd_database_reads{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "couchdb_httpd_database_reads{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "couchdb_httpd_database_writes{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "couchdb_httpd_database_writes{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 8 + } }, { - "height": 4, + "yPos": 64, + "width": 24, + "height": 16, "widget": { - "title": "Open File Descriptors", + "title": "Bulk Requests", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "couchdb_httpd_open_os_files{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "couchdb_httpd_bulk_requests{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 12 + } }, { - "height": 4, + "xPos": 24, + "yPos": 64, + "width": 24, + "height": 16, "widget": { - "title": "Open Databases", + "title": "Views", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "couchdb_httpd_open_databases{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "couchdb_httpd_view_reads{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 12 + } } ] - } + }, + "labels": {} } diff --git a/dashboards/couchdb/couchdb-prometheus-overview.png b/dashboards/couchdb/couchdb-prometheus-overview.png new file mode 100644 index 0000000000..92ded8b7f2 Binary files /dev/null and b/dashboards/couchdb/couchdb-prometheus-overview.png differ diff --git a/dashboards/couchdb/metadata.yaml b/dashboards/couchdb/metadata.yaml index 3fb5a9813b..1449947cb6 100644 --- a/dashboards/couchdb/metadata.yaml +++ b/dashboards/couchdb/metadata.yaml @@ -11,7 +11,7 @@ sample_dashboards: category: CouchDB id: couchdb-prometheus-overview display_name: CouchDB Prometheus Overview - description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/gesellix/couchdb-prometheus-exporter), including Requests by Method, Responses by Status Code, Average Request Time, Bulk Requests, Views, Database Operations, Open Files Descriptors, and Open Databases." + description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/gesellix/couchdb-prometheus-exporter), including Service Status, Authentication Cache Hits, Authentication Cache Misses, Open Databases, Open Files Descriptors, Responses by Status Code, Average Request Time, Requests by Method, Database Operations, Bulk Requests, and Views." related_integrations: - id: couchdb platform: GKE diff --git a/dashboards/flink/README.md b/dashboards/flink/README.md index da774b628e..964369e790 100644 --- a/dashboards/flink/README.md +++ b/dashboards/flink/README.md @@ -19,9 +19,9 @@ |Flink Job Manager Prometheus Overview| |:------------------| |Filename: [flink-job-manager-prometheus.json](flink-job-manager-prometheus.json)| -|This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: `Registered Task Managers`, `Running Jobs`, `Task Slots Available`, `Task Slots Total`, `CPU Load`, `CPU Time`, `Threads`, `Heap Used (bytes)`, `Heap Committed (bytes)`, `Heap Max (bytes)`, `Nonheap Used (bytes)`, `Nonheap Committed (bytes)`, `Nonheap Max (bytes)`, `Metaspace Used (bytes)`, `Metaspace Committed (bytes)`, `Metaspace Max (bytes)`, `Direct Used (bytes)`, `Direct Total Capacity (bytes)`, `Mapped Used (bytes)`, `Mapped Total Capacity (bytes)`, `Garbage Collection Time`, `Garbage Collection Count`, and `Class Loader`. | +|This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: `Registered Task Managers`, `Running Jobs`, `Task Slots Available`, `Task Slots Total`, `JVM CPU Load`, `JVM CPU Time`, `JVM Threads`, `JVM Heap Used (bytes)`, `JVM Heap Committed (bytes)`, `JVM Heap Max (bytes)`, `JVM Nonheap Used (bytes)`, `JVM Nonheap Committed (bytes)`, `JVM Nonheap Max (bytes)`, `JVM Metaspace Used (bytes)`, `JVM Metaspace Committed (bytes)`, `JVM Metaspace Max (bytes)`, `JVM Direct Used (bytes)`, `JVM Direct Total Capacity (bytes)`, `JVM Mapped Used (bytes)`, `JVM Mapped Total Capacity (bytes)`, `JVM Garbage Collector Time`, `JVM Garbage Collector Count`, and `JVM Class Loader`. | |Flink Task Manager Prometheus Overview| |:------------------| |Filename: [flink-task-manager-prometheus.json](flink-task-manager-prometheus.json)| -|This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: `CPU Load`, `CPU Time`, `Threads`, `Heap Used (bytes)`, `Heap Committed (bytes)`, `Heap Max (bytes)`, `Nonheap Used (bytes)`, `Nonheap Committed (bytes)`, `Nonheap Max (bytes)`, `Metaspace Used (bytes)`, `Metaspace Committed (bytes)`, `Metaspace Max (bytes)`, `Direct Used (bytes)`, `Direct Total Capacity (bytes)`, `Mapped Used (bytes)`, `Mapped Total Capacity (bytes)`, `Garbage Collection Time`, `Garbage Collection Count`, `Managed Memory Used`, `Managed Memory Total`, and `Class Loader`.| +|This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: `Managed Memory Utilization %`, `JVM Heap Utilization %`, `JVM Nonheap Utilization %`, `JVM Metaspace Utilization %`, `Managed Memory Used`, `Managed Memory Total`, `JVM CPU Load`, `JVM CPU Time`, `JVM Threads`, `JVM Heap Used (bytes)`, `JVM Heap Committed (bytes)`, `JVM Heap Max (bytes)`, `JVM Nonheap Used (bytes)`, `JVM Nonheap Committed (bytes)`, `JVM Nonheap Max (bytes)`, `JVM Metaspace Used (bytes)`, `JVM Metaspace Committed (bytes)`, `JVM Metaspace Max (bytes)`, `JVM Direct Used (bytes)`, `JVM Direct Total Capacity (bytes)`, `JVM Mapped Used (bytes)`, `JVM Mapped Total Capacity (bytes)`, `JVM Garbage Collector Time`, `JVM Garbage Collector Count`, `JVM Class Loader`, `Netty Shuffle Available Memory (bytes)`, `Netty Shuffle Used Memory (bytes)`, `Netty Shuffle Total Memory (bytes)`, `Netty Shuffle Available Memory Segments`, `Netty Shuffle Used Memory Segments`, and `Netty Shuffle Total Memory Segments`. | diff --git a/dashboards/flink/flink-job-manager-prometheus.01.png b/dashboards/flink/flink-job-manager-prometheus.01.png index 36256f6893..681398c267 100644 Binary files a/dashboards/flink/flink-job-manager-prometheus.01.png and b/dashboards/flink/flink-job-manager-prometheus.01.png differ diff --git a/dashboards/flink/flink-job-manager-prometheus.02.png b/dashboards/flink/flink-job-manager-prometheus.02.png index fabe4c66e3..2c64fde994 100644 Binary files a/dashboards/flink/flink-job-manager-prometheus.02.png and b/dashboards/flink/flink-job-manager-prometheus.02.png differ diff --git a/dashboards/flink/flink-job-manager-prometheus.03.png b/dashboards/flink/flink-job-manager-prometheus.03.png index 3c2c00a2bf..2f1de51e5a 100644 Binary files a/dashboards/flink/flink-job-manager-prometheus.03.png and b/dashboards/flink/flink-job-manager-prometheus.03.png differ diff --git a/dashboards/flink/flink-job-manager-prometheus.json b/dashboards/flink/flink-job-manager-prometheus.json index d9df8323f9..408025b99f 100644 --- a/dashboards/flink/flink-job-manager-prometheus.json +++ b/dashboards/flink/flink-job-manager-prometheus.json @@ -1,5 +1,6 @@ { "category": "CUSTOM", + "displayName": "Apache Flink Job Manager Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,13 +18,13 @@ "templateVariable": "Namespace" } ], - "displayName": "Apache Flink Job Manager Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "yPos": 8, + "width": 12, + "height": 16, "widget": { "title": "Registered Task Managers", "xyChart": { @@ -32,25 +33,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_numRegisteredTaskManagers{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_numRegisteredTaskManagers{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 0, - "yPos": 0 + } }, { - "height": 4, + "xPos": 12, + "yPos": 8, + "width": 12, + "height": 16, "widget": { "title": "Running Jobs", "xyChart": { @@ -59,25 +66,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_numRunningJobs{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_numRunningJobs{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 3, - "yPos": 0 + } }, { - "height": 4, + "xPos": 24, + "yPos": 8, + "width": 12, + "height": 16, "widget": { "title": "Task Slots Available", "xyChart": { @@ -86,25 +99,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_taskSlotsAvailable{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_taskSlotsAvailable{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 6, - "yPos": 0 + } }, { - "height": 4, + "xPos": 36, + "yPos": 8, + "width": 12, + "height": 16, "widget": { "title": "Task Slots Total", "xyChart": { @@ -113,25 +132,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_taskSlotsTotal{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_taskSlotsTotal{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 9, - "yPos": 0 + } }, { - "height": 4, + "yPos": 32, + "width": 16, + "height": 16, "widget": { "title": "CPU Load", "xyChart": { @@ -140,25 +164,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_CPU_Load{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_CPU_Load{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 4 + } }, { - "height": 4, + "xPos": 16, + "yPos": 32, + "width": 16, + "height": 16, "widget": { "title": "CPU Time", "xyChart": { @@ -167,25 +197,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_CPU_Time{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_CPU_Time{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 4 + } }, { - "height": 4, + "xPos": 32, + "yPos": 32, + "width": 16, + "height": 16, "widget": { "title": "Threads", "xyChart": { @@ -194,25 +230,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Threads_Count{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Threads_Count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 4 + } }, { - "height": 4, + "yPos": 52, + "width": 16, + "height": 16, "widget": { "title": "Heap Used (bytes)", "xyChart": { @@ -221,25 +262,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Heap_Used{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Heap_Used{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 8 + } }, { - "height": 4, + "xPos": 16, + "yPos": 52, + "width": 16, + "height": 16, "widget": { "title": "Heap Committed (bytes)", "xyChart": { @@ -248,25 +295,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Heap_Committed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Heap_Committed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 8 + } }, { - "height": 4, + "xPos": 32, + "yPos": 52, + "width": 16, + "height": 16, "widget": { "title": "Heap Max (bytes)", "xyChart": { @@ -275,25 +328,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Heap_Max{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Heap_Max{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 8 + } }, { - "height": 4, + "yPos": 68, + "width": 16, + "height": 16, "widget": { "title": "Nonheap Used (bytes)", "xyChart": { @@ -302,25 +360,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_NonHeap_Used{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_NonHeap_Used{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 12 + } }, { - "height": 4, + "xPos": 16, + "yPos": 68, + "width": 16, + "height": 16, "widget": { "title": "Nonheap Committed (bytes)", "xyChart": { @@ -329,25 +393,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_NonHeap_Committed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_NonHeap_Committed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 12 + } }, { - "height": 4, + "xPos": 32, + "yPos": 68, + "width": 16, + "height": 16, "widget": { "title": "Nonheap Max (bytes)", "xyChart": { @@ -356,25 +426,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_NonHeap_Max{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_NonHeap_Max{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 12 + } }, { - "height": 4, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Metaspace Used (bytes)", "xyChart": { @@ -383,25 +458,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Metaspace_Used{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Metaspace_Used{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 16 + } }, { - "height": 4, + "xPos": 16, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Metaspace Committed (bytes)", "xyChart": { @@ -410,25 +491,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Metaspace_Committed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Metaspace_Committed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 16 + } }, { - "height": 4, + "xPos": 32, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Metaspace Max (bytes)", "xyChart": { @@ -437,25 +524,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Metaspace_Max{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Metaspace_Max{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 16 + } }, { - "height": 4, + "yPos": 100, + "width": 24, + "height": 16, "widget": { "title": "Direct Used (bytes)", "xyChart": { @@ -464,25 +556,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Direct_MemoryUsed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Direct_MemoryUsed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 20 + } }, { - "height": 4, + "xPos": 24, + "yPos": 100, + "width": 24, + "height": 16, "widget": { "title": "Direct Total Capacity (bytes)", "xyChart": { @@ -491,25 +589,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Direct_TotalCapacity{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Direct_TotalCapacity{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 20 + } }, { - "height": 4, + "yPos": 116, + "width": 24, + "height": 16, "widget": { "title": "Mapped Used (bytes)", "xyChart": { @@ -518,25 +621,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Mapped_MemoryUsed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Mapped_MemoryUsed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 24 + } }, { - "height": 4, + "xPos": 24, + "yPos": 116, + "width": 24, + "height": 16, "widget": { "title": "Mapped Total Capacity (bytes)", "xyChart": { @@ -545,25 +654,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Mapped_TotalCapacity{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_Memory_Mapped_TotalCapacity{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 24 + } }, { - "height": 4, + "yPos": 136, + "width": 24, + "height": 16, "widget": { "title": "Garbage Collector Time", "xyChart": { @@ -572,32 +686,42 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Time{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Time{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Time{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Time{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 28 + } }, { - "height": 4, + "xPos": 24, + "yPos": 136, + "width": 24, + "height": 16, "widget": { "title": "Garbage Collector Count", "xyChart": { @@ -606,32 +730,41 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Count{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Count{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 28 + } }, { - "height": 4, + "yPos": 152, + "width": 48, + "height": 16, "widget": { "title": "Class Loader", "xyChart": { @@ -640,30 +773,223 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_ClassLoader_ClassesLoaded{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_ClassLoader_ClassesLoaded{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_jobmanager_Status_JVM_ClassLoader_ClassesUnloaded{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_jobmanager_Status_JVM_ClassLoader_ClassesUnloaded{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 12, - "xPos": 0, - "yPos": 32 + } + }, + { + "yPos": 24, + "width": 48, + "height": 4, + "widget": { + "title": "Java Virtual Machine", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 28, + "width": 48, + "height": 4, + "widget": { + "title": "Processing", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_MEDIUM", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 48, + "width": 48, + "height": 4, + "widget": { + "title": "Memory", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_MEDIUM", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 132, + "width": 48, + "height": 4, + "widget": { + "title": "Java Runtime Environment Components", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_MEDIUM", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "width": 8, + "height": 8, + "widget": { + "title": "Registered Task Managers", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(flink_jobmanager_numRegisteredTaskManagers{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 8, + "width": 8, + "height": 8, + "widget": { + "title": "Running Jobs", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(flink_jobmanager_numRunningJobs{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 16, + "width": 8, + "height": 8, + "widget": { + "title": "Task Slots Available", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(flink_jobmanager_taskSlotsAvailable{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 24, + "width": 8, + "height": 8, + "widget": { + "title": "Task Slots Total", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(flink_jobmanager_taskSlotsTotal{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 32, + "width": 8, + "height": 8, + "widget": { + "title": "JVM CPU Load", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(flink_jobmanager_Status_JVM_CPU_Load{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 40, + "width": 8, + "height": 8, + "widget": { + "title": "JVM Threads", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(flink_jobmanager_Status_JVM_Threads_Count{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } } ] - } + }, + "labels": {} } diff --git a/dashboards/flink/flink-task-manager-prometheus.01.png b/dashboards/flink/flink-task-manager-prometheus.01.png index 3a5bedadbf..7e08a8051a 100644 Binary files a/dashboards/flink/flink-task-manager-prometheus.01.png and b/dashboards/flink/flink-task-manager-prometheus.01.png differ diff --git a/dashboards/flink/flink-task-manager-prometheus.02.png b/dashboards/flink/flink-task-manager-prometheus.02.png index ec416747ee..827efea175 100644 Binary files a/dashboards/flink/flink-task-manager-prometheus.02.png and b/dashboards/flink/flink-task-manager-prometheus.02.png differ diff --git a/dashboards/flink/flink-task-manager-prometheus.03.png b/dashboards/flink/flink-task-manager-prometheus.03.png index 7c30bcd709..5b94cf41e3 100644 Binary files a/dashboards/flink/flink-task-manager-prometheus.03.png and b/dashboards/flink/flink-task-manager-prometheus.03.png differ diff --git a/dashboards/flink/flink-task-manager-prometheus.json b/dashboards/flink/flink-task-manager-prometheus.json index 63756393f7..f6eab1836f 100644 --- a/dashboards/flink/flink-task-manager-prometheus.json +++ b/dashboards/flink/flink-task-manager-prometheus.json @@ -1,5 +1,6 @@ { "category": "CUSTOM", + "displayName": "Apache Flink Task Manager Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,13 +18,13 @@ "templateVariable": "Namespace" } ], - "displayName": "Apache Flink Task Manager Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "yPos": 32, + "width": 16, + "height": 16, "widget": { "title": "CPU Load", "xyChart": { @@ -32,25 +33,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_CPU_Load{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_CPU_Load{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 0 + } }, { - "height": 4, + "xPos": 16, + "yPos": 32, + "width": 16, + "height": 16, "widget": { "title": "CPU Time", "xyChart": { @@ -59,25 +66,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_CPU_Time{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_CPU_Time{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 0 + } }, { - "height": 4, + "xPos": 32, + "yPos": 32, + "width": 16, + "height": 16, "widget": { "title": "Threads", "xyChart": { @@ -86,25 +99,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Threads_Count{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Threads_Count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 0 + } }, { - "height": 4, + "yPos": 52, + "width": 16, + "height": 16, "widget": { "title": "Heap Used (bytes)", "xyChart": { @@ -113,25 +131,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Heap_Used{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Heap_Used{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 4 + } }, { - "height": 4, + "xPos": 16, + "yPos": 52, + "width": 16, + "height": 16, "widget": { "title": "Heap Committed (bytes)", "xyChart": { @@ -140,25 +164,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Heap_Committed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Heap_Committed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 4 + } }, { - "height": 4, + "xPos": 32, + "yPos": 52, + "width": 16, + "height": 16, "widget": { "title": "Heap Max (bytes)", "xyChart": { @@ -167,25 +197,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Heap_Max{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Heap_Max{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 4 + } }, { - "height": 4, + "yPos": 68, + "width": 16, + "height": 16, "widget": { "title": "Nonheap Used (bytes)", "xyChart": { @@ -194,25 +229,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_NonHeap_Used{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_NonHeap_Used{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 8 + } }, { - "height": 4, + "xPos": 16, + "yPos": 68, + "width": 16, + "height": 16, "widget": { "title": "Nonheap Committed (bytes)", "xyChart": { @@ -221,25 +262,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_NonHeap_Committed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_NonHeap_Committed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 8 + } }, { - "height": 4, + "xPos": 32, + "yPos": 68, + "width": 16, + "height": 16, "widget": { "title": "Nonheap Max (bytes)", "xyChart": { @@ -248,25 +295,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_NonHeap_Max{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_NonHeap_Max{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 8 + } }, { - "height": 4, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Metaspace Used (bytes)", "xyChart": { @@ -275,25 +327,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Metaspace_Used{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Metaspace_Used{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 12 + } }, { - "height": 4, + "xPos": 16, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Metaspace Committed (bytes)", "xyChart": { @@ -302,25 +360,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Metaspace_Committed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Metaspace_Committed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 12 + } }, { - "height": 4, + "xPos": 32, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Metaspace Max (bytes)", "xyChart": { @@ -329,25 +393,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Metaspace_Max{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Metaspace_Max{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 12 + } }, { - "height": 4, + "yPos": 100, + "width": 24, + "height": 16, "widget": { "title": "Direct Used (bytes)", "xyChart": { @@ -356,25 +425,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Direct_MemoryUsed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Direct_MemoryUsed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 16 + } }, { - "height": 4, + "xPos": 24, + "yPos": 100, + "width": 24, + "height": 16, "widget": { "title": "Direct Total Capacity (bytes)", "xyChart": { @@ -383,25 +458,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Direct_TotalCapacity{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Direct_TotalCapacity{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 16 + } }, { - "height": 4, + "yPos": 116, + "width": 24, + "height": 16, "widget": { "title": "Mapped Used (bytes)", "xyChart": { @@ -410,25 +490,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Mapped_MemoryUsed{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Mapped_MemoryUsed{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 20 + } }, { - "height": 4, + "xPos": 24, + "yPos": 116, + "width": 24, + "height": 16, "widget": { "title": "Mapped Total Capacity (bytes)", "xyChart": { @@ -437,25 +523,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Mapped_TotalCapacity{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_Memory_Mapped_TotalCapacity{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 20 + } }, { - "height": 4, + "yPos": 136, + "width": 24, + "height": 16, "widget": { "title": "Garbage Collector Time", "xyChart": { @@ -464,32 +555,42 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Time{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Time{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Time{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Time{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 24 + } }, { - "height": 4, + "xPos": 24, + "yPos": 136, + "width": 24, + "height": 16, "widget": { "title": "Garbage Collector Count", "xyChart": { @@ -498,32 +599,41 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Count{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Old_Generation_Count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Count{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_GarbageCollector_G1_Young_Generation_Count{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 24 + } }, { - "height": 4, + "yPos": 8, + "width": 24, + "height": 16, "widget": { "title": "Managed Memory Used", "xyChart": { @@ -532,25 +642,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_Flink_Memory_Managed_Used{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_Flink_Memory_Managed_Used{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 28 + } }, { - "height": 4, + "xPos": 24, + "yPos": 8, + "width": 24, + "height": 16, "widget": { "title": "Managed Memory Total", "xyChart": { @@ -559,25 +675,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_Flink_Memory_Managed_Total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_Flink_Memory_Managed_Total{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 28 + } }, { - "height": 4, + "yPos": 152, + "width": 48, + "height": 16, "widget": { "title": "Class Loader", "xyChart": { @@ -586,30 +707,406 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_ClassLoader_ClassesLoaded{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_ClassLoader_ClassesLoaded{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "flink_taskmanager_Status_JVM_ClassLoader_ClassesUnloaded{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "flink_taskmanager_Status_JVM_ClassLoader_ClassesUnloaded{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 28 + } + }, + { + "yPos": 24, + "width": 48, + "height": 4, + "widget": { + "title": "Java Virtual Machine", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 28, + "width": 48, + "height": 4, + "widget": { + "title": "Processing", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_MEDIUM", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 48, + "width": 48, + "height": 4, + "widget": { + "title": "Memory", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_MEDIUM", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 132, + "width": 48, + "height": 4, + "widget": { + "title": "Java Runtime Environment Components", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_MEDIUM", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 168, + "width": 48, + "height": 4, + "widget": { + "title": "Netty Shuffle", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 172, + "width": 16, + "height": 16, + "widget": { + "title": "Available Memory (bytes)", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "flink_taskmanager_Status_Shuffle_Netty_AvailableMemory{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 188, + "width": 16, + "height": 16, + "widget": { + "title": "Available Memory Segments", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "flink_taskmanager_Status_Shuffle_Netty_AvailableMemorySegments{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 16, + "yPos": 172, + "width": 16, + "height": 16, + "widget": { + "title": "Used Memory (bytes)", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "flink_taskmanager_Status_Shuffle_Netty_UsedMemory{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 32, + "yPos": 172, + "width": 16, + "height": 16, + "widget": { + "title": "Total Memory (bytes)", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "flink_taskmanager_Status_Shuffle_Netty_TotalMemory{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 16, + "yPos": 188, + "width": 16, + "height": 16, + "widget": { + "title": "Used Memory Segments", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "flink_taskmanager_Status_Shuffle_Netty_UsedMemorySegments{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 32, + "yPos": 188, + "width": 16, + "height": 16, + "widget": { + "title": "Total Memory Segments", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "flink_taskmanager_Status_Shuffle_Netty_TotalMemorySegments{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "width": 12, + "height": 8, + "widget": { + "title": "Managed Memory Utilization %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "(sum(flink_taskmanager_Status_Flink_Memory_Managed_Used{${Cluster},${Location},${Namespace}}) / sum(flink_taskmanager_Status_Flink_Memory_Managed_Total{${Cluster},${Location},${Namespace}})) * 100", + "unitOverride": "" + } + } + } + }, + { + "xPos": 12, + "width": 12, + "height": 8, + "widget": { + "title": "JVM Heap Utilization %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "(sum(flink_taskmanager_Status_JVM_Memory_Heap_Used{${Cluster},${Location},${Namespace}}) / sum(flink_taskmanager_Status_JVM_Memory_Heap_Max{${Cluster},${Location},${Namespace}})) * 100", + "unitOverride": "" + } + } + } + }, + { + "xPos": 24, + "width": 12, + "height": 8, + "widget": { + "title": "JVM Nonheap Utilization %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "(sum(flink_taskmanager_Status_JVM_Memory_NonHeap_Used{${Cluster},${Location},${Namespace}}) / sum(flink_taskmanager_Status_JVM_Memory_NonHeap_Max{${Cluster},${Location},${Namespace}})) * 100", + "unitOverride": "" + } + } + } + }, + { + "xPos": 36, + "width": 12, + "height": 8, + "widget": { + "title": "JVM Metaspace Utilization %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "(sum(flink_taskmanager_Status_JVM_Memory_Metaspace_Used{${Cluster},${Location},${Namespace}}) / sum(flink_taskmanager_Status_JVM_Memory_Metaspace_Max{${Cluster},${Location},${Namespace}})) * 100", + "unitOverride": "" + } + } + } } ] - } + }, + "labels": {} } diff --git a/dashboards/flink/metadata.yaml b/dashboards/flink/metadata.yaml index a67c0d65ba..49b9fc6e36 100644 --- a/dashboards/flink/metadata.yaml +++ b/dashboards/flink/metadata.yaml @@ -23,7 +23,7 @@ sample_dashboards: id: flink-job-manager-prometheus display_name: Flink Job Manager Prometheus Overview description: |- - This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: Registered Task Managers, Running Jobs, Task Slots Available, Task Slots Total, CPU Load, CPU Time, Threads, Heap Used (bytes), Heap Committed (bytes), Heap Max (bytes), Nonheap Used (bytes), Nonheap Committed (bytes), Nonheap Max (bytes), Metaspace Used (bytes), Metaspace Committed (bytes), Metaspace Max (bytes), Direct Used (bytes), Direct Total Capacity (bytes), Mapped Used (bytes), Mapped Total Capacity (bytes), Garbage Collection Time, Garbage Collection Count, and Class Loader. + This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: Registered Task Managers, Running Jobs, Task Slots Available, Task Slots Total, JVM CPU Load, JVM CPU Time, JVM Threads, JVM Heap Used (bytes), JVM Heap Committed (bytes), JVM Heap Max (bytes), JVM Nonheap Used (bytes), JVM Nonheap Committed (bytes), JVM Nonheap Max (bytes), JVM Metaspace Used (bytes), JVM Metaspace Committed (bytes), JVM Metaspace Max (bytes), JVM Direct Used (bytes), JVM Direct Total Capacity (bytes), JVM Mapped Used (bytes), JVM Mapped Total Capacity (bytes), JVM Garbage Collector Time, JVM Garbage Collector Count, and JVM Class Loader. related_integrations: - id: flink platform: GKE @@ -32,7 +32,7 @@ sample_dashboards: id: flink-task-manager-prometheus display_name: Flink Task Manager Prometheus Overview description: |- - This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: CPU Load, CPU Time, Threads, Heap Used (bytes), Heap Committed (bytes), Heap Max (bytes), Nonheap Used (bytes), Nonheap Committed (bytes), Nonheap Max (bytes), Metaspace Used (bytes), Metaspace Committed (bytes), Metaspace Max (bytes), Direct Used (bytes), Direct Total Capacity (bytes), Mapped Used (bytes), Mapped Total Capacity (bytes), Garbage Collection Time, Garbage Collection Count, Managed Memory Used, Managed Memory Total, and Class Loader. + This dashboard is based on prometheus metrics exposed by [Apache Flink](https://nightlies.apache.org/flink/flink-docs-release-1.7/monitoring/metrics.html#prometheus-orgapacheflinkmetricsprometheusprometheusreporter). This dashboard has charts displaying: Managed Memory Utilization %, JVM Heap Utilization %, JVM Nonheap Utilization %, JVM Metaspace Utilization %, Managed Memory Used, Managed Memory Total, JVM CPU Load, JVM CPU Time, JVM Threads, JVM Heap Used (bytes), JVM Heap Committed (bytes), JVM Heap Max (bytes), JVM Nonheap Used (bytes), JVM Nonheap Committed (bytes), JVM Nonheap Max (bytes), JVM Metaspace Used (bytes), JVM Metaspace Committed (bytes), JVM Metaspace Max (bytes), JVM Direct Used (bytes), JVM Direct Total Capacity (bytes), JVM Mapped Used (bytes), JVM Mapped Total Capacity (bytes), JVM Garbage Collector Time, JVM Garbage Collector Count, JVM Class Loader, Netty Shuffle Available Memory (bytes), Netty Shuffle Used Memory (bytes), Netty Shuffle Total Memory (bytes), Netty Shuffle Available Memory Segments, Netty Shuffle Used Memory Segments, and Netty Shuffle Total Memory Segments. related_integrations: - id: flink platform: GKE diff --git a/dashboards/google-backupdr/README.md b/dashboards/google-backupdr/README.md index a9f077c7bc..ddc1f5a2ba 100644 --- a/dashboards/google-backupdr/README.md +++ b/dashboards/google-backupdr/README.md @@ -3,4 +3,4 @@ |BackupDr Monitoring| |:---------------------| |Filename: [backupdr-monitoring.json](backupdr-monitoring.json)| -|This dashboard has 5 charts for the related [Backup DR metrics], including metrics for storage pool utilization,vdisk count utilization,OnVault pool Utilization along with Protected Data volume and count.| +|This dashboard has 4 charts for the related [Backup DR metrics], including metrics for storage pool utilization, vdisk count utilization, OnVault pool Utilization along with Protected Data volume.| diff --git a/dashboards/google-backupdr/backupdr-monitoring.json b/dashboards/google-backupdr/backupdr-monitoring.json index 37408e1020..ff4e8b20f2 100644 --- a/dashboards/google-backupdr/backupdr-monitoring.json +++ b/dashboards/google-backupdr/backupdr-monitoring.json @@ -10,7 +10,7 @@ { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"staging-backupdr.sandbox.googleapis.com/backup_recovery_appliance/storage_pool/utilization\" resource.type=\"staging-backupdr.sandbox.googleapis.com/ManagementConsole\"", + "filter": "metric.type=\"backupdr.googleapis.com/backup_recovery_appliance/storage_pool/utilization\" resource.type=\"backupdr.googleapis.com/ManagementConsole\"", "aggregation": { "perSeriesAligner": "ALIGN_MEAN", "groupByFields": [] @@ -40,7 +40,7 @@ { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"staging-backupdr.sandbox.googleapis.com/backup_recovery_appliance/v_disk_count/utilization\" resource.type=\"staging-backupdr.sandbox.googleapis.com/ManagementConsole\"", + "filter": "metric.type=\"backupdr.googleapis.com/backup_recovery_appliance/v_disk_count/utilization\" resource.type=\"backupdr.googleapis.com/ManagementConsole\"", "aggregation": { "perSeriesAligner": "ALIGN_MEAN", "groupByFields": [] @@ -70,7 +70,7 @@ { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"staging-backupdr.sandbox.googleapis.com/onvault_pools/utilization\" resource.type=\"staging-backupdr.sandbox.googleapis.com/ManagementConsole\"", + "filter": "metric.type=\"backupdr.googleapis.com/onvault_pools/utilization\" resource.type=\"backupdr.googleapis.com/ManagementConsole\"", "aggregation": { "perSeriesAligner": "ALIGN_MEAN", "groupByFields": [] @@ -100,7 +100,7 @@ { "timeSeriesQuery": { "timeSeriesFilter": { - "filter": "metric.type=\"staging-backupdr.sandbox.googleapis.com/protected_data/volume\" resource.type=\"staging-backupdr.sandbox.googleapis.com/ManagementConsole\"", + "filter": "metric.type=\"backupdr.googleapis.com/protected_data/volume\" resource.type=\"backupdr.googleapis.com/ManagementConsole\"", "aggregation": { "perSeriesAligner": "ALIGN_MEAN", "groupByFields": [] @@ -121,36 +121,6 @@ }, "thresholds": [] } - }, - { - "title": "Protected Data Resources Count", - "xyChart": { - "dataSets": [ - { - "timeSeriesQuery": { - "timeSeriesFilter": { - "filter": "metric.type=\"staging-backupdr.sandbox.googleapis.com/protected_data/resources\" resource.type=\"staging-backupdr.sandbox.googleapis.com/ManagementConsole\"", - "aggregation": { - "perSeriesAligner": "ALIGN_MEAN", - "groupByFields": [] - } - } - }, - "plotType": "LINE", - "minAlignmentPeriod": "60s", - "targetAxis": "Y1", - "legendTemplate": "" - } - ], - "chartOptions": { - "mode": "COLOR", - "displayHorizontal": false - }, - "yAxis": { - "scale": "LINEAR" - }, - "thresholds": [] - } } ] } diff --git a/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.json b/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.json index 6fd8d5a82b..031e5b19bf 100644 --- a/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.json +++ b/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.json @@ -1,12 +1,10 @@ { "displayName": "GKE DPv2 Observability - Cluster Flows Overview", "dashboardFilters": [], - "category": "CUSTOM", "mosaicLayout": { "columns": 48, "tiles": [ { - "xPos": 0, "yPos": 26, "width": 24, "height": 16, @@ -18,11 +16,13 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "minAlignmentPeriod": "60s", "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -46,7 +46,6 @@ } }, { - "xPos": 0, "yPos": 6, "width": 24, "height": 4, @@ -90,7 +89,6 @@ } }, { - "xPos": 0, "yPos": 42, "width": 24, "height": 16, @@ -109,7 +107,6 @@ "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -133,7 +130,6 @@ } }, { - "xPos": 0, "yPos": 10, "width": 24, "height": 16, @@ -145,11 +141,13 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "minAlignmentPeriod": "60s", "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -185,11 +183,13 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "minAlignmentPeriod": "60s", "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -225,11 +225,13 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "minAlignmentPeriod": "60s", "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -272,7 +274,6 @@ "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -296,8 +297,6 @@ } }, { - "xPos": 0, - "yPos": 0, "width": 48, "height": 6, "widget": { diff --git a/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.png b/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.png index 8ab9519855..05e912f1b0 100644 Binary files a/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.png and b/dashboards/google-kubernetes-engine/gke-dpv2-observability-cluster-flows-overview.png differ diff --git a/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.json b/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.json index ca9edcb841..5bbabfd377 100644 --- a/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.json +++ b/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.json @@ -1,12 +1,10 @@ { "displayName": "GKE DPv2 Observability - Drilldown View", "dashboardFilters": [], - "category": "CUSTOM", "mosaicLayout": { "columns": 48, "tiles": [ { - "xPos": 0, "yPos": 52, "width": 24, "height": 16, @@ -25,7 +23,6 @@ "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -73,7 +70,6 @@ "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -102,7 +98,6 @@ } }, { - "xPos": 0, "yPos": 16, "width": 24, "height": 4, @@ -146,7 +141,6 @@ } }, { - "xPos": 0, "yPos": 20, "width": 24, "height": 16, @@ -158,15 +152,16 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "minAlignmentPeriod": "60s", "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", - "crossSeriesReducer": "REDUCE_NONE", "perSeriesAligner": "ALIGN_RATE" }, "filter": "metric.type=\"prometheus.googleapis.com/pod_flow_egress_flows_count/counter\" resource.type=\"prometheus_target\"", @@ -200,11 +195,13 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "minAlignmentPeriod": "60s", "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", @@ -238,8 +235,6 @@ } }, { - "xPos": 0, - "yPos": 0, "width": 48, "height": 10, "widget": { @@ -260,7 +255,6 @@ } }, { - "xPos": 0, "yPos": 10, "width": 48, "height": 6, @@ -282,7 +276,6 @@ } }, { - "xPos": 0, "yPos": 36, "width": 24, "height": 16, @@ -294,15 +287,16 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "minAlignmentPeriod": "60s", "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", - "crossSeriesReducer": "REDUCE_NONE", "perSeriesAligner": "ALIGN_RATE" }, "filter": "metric.type=\"prometheus.googleapis.com/pod_flow_egress_flows_count/counter\" resource.type=\"prometheus_target\" metric.label.\"verdict\"=\"FORWARDED\"", @@ -340,25 +334,15 @@ "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "apiSource": "DEFAULT_CLOUD", "timeSeriesFilter": { "aggregation": { "alignmentPeriod": "60s", - "crossSeriesReducer": "REDUCE_SUM", - "groupByFields": [ - "metric.label.\"remote_workload\"", - "metric.label.\"remote_namespace\"", - "metric.label.\"pod_name\"", - "metric.label.\"remote_class\"", - "metric.label.\"namespace_name\"", - "metric.label.\"workload_name\"" - ], "perSeriesAligner": "ALIGN_RATE" }, - "filter": "metric.type=\"prometheus.googleapis.com/pod_flow_egress_flows_count/counter\" resource.type=\"prometheus_target\" metric.label.\"verdict\"=\"FORWARDED\"", + "filter": "metric.type=\"prometheus.googleapis.com/pod_flow_ingress_flows_count/counter\" resource.type=\"prometheus_target\" metric.label.\"verdict\"=\"FORWARDED\"", "pickTimeSeriesFilter": { "direction": "TOP", - "numTimeSeries": 10, + "numTimeSeries": 20, "rankingMethod": "METHOD_MEAN" } } diff --git a/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.png b/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.png index 3e5baafb51..1d2e57cb97 100644 Binary files a/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.png and b/dashboards/google-kubernetes-engine/gke-dpv2-observability-drilldown-view.png differ diff --git a/dashboards/hadoop/README.md b/dashboards/hadoop/README.md index 3a867e79c8..9fc64a41a2 100644 --- a/dashboards/hadoop/README.md +++ b/dashboards/hadoop/README.md @@ -13,4 +13,4 @@ |Hadoop Prometheus Overview| |:------------------| |Filename: [hadoop-prometheus-overview.json](hadoop-prometheus-overview.json)| -|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter), including `Data Nodes`, `Volume Failures`, `Capacity Used`, `Capacity Limit`, `Block Count`, `Corrupt Blocks`, `Missing Blocks`, `File Count`, and `File Load`.| +|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter), including `Open Connections`, `Active Clients`, `Active Sources`, `Active Sinks`, `Block Pool Utilization %`, `Threads`, `Active Data Nodes`, `Dead Data Nodes`, `Stale Data Nodes`, `Capacity Used`, `Capacity Limit`, `File Count`, `File Load`, `Heap Memory (mb)`, `Nonheap Memory (mb)`, `Block Count`, `Block Capacity`, `Missing Blocks`, `Corrupt Blocks`, `Under-replicated Blocks`, `Excess Blocks`, `Volume Failures`, `Dropped Connections`, `Error Logs`, and `Fatal Logs`.| diff --git a/dashboards/hadoop/hadoop-prometheus-overview.01.png b/dashboards/hadoop/hadoop-prometheus-overview.01.png index 49e459035a..cde9928860 100644 Binary files a/dashboards/hadoop/hadoop-prometheus-overview.01.png and b/dashboards/hadoop/hadoop-prometheus-overview.01.png differ diff --git a/dashboards/hadoop/hadoop-prometheus-overview.02.png b/dashboards/hadoop/hadoop-prometheus-overview.02.png index 326816522f..7293e8ed7d 100644 Binary files a/dashboards/hadoop/hadoop-prometheus-overview.02.png and b/dashboards/hadoop/hadoop-prometheus-overview.02.png differ diff --git a/dashboards/hadoop/hadoop-prometheus-overview.json b/dashboards/hadoop/hadoop-prometheus-overview.json index 7067ae044e..91e7c90e56 100644 --- a/dashboards/hadoop/hadoop-prometheus-overview.json +++ b/dashboards/hadoop/hadoop-prometheus-overview.json @@ -1,5 +1,5 @@ { - "category": "CUSTOM", + "displayName": "Hadoop Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,40 +17,45 @@ "templateVariable": "Namespace" } ], - "displayName": "Hadoop Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "yPos": 12, + "width": 16, + "height": 16, "widget": { - "title": "Data Nodes", + "title": "Active Data Nodes", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_numlivedatanodes{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_numlivedatanodes{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 0 + } }, { - "height": 4, + "yPos": 120, + "width": 24, + "height": 16, "widget": { "title": "Volume Failures", "xyChart": { @@ -59,25 +64,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_volumefailurestotal{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_volumefailurestotal{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 0 + } }, { - "height": 4, + "yPos": 32, + "width": 24, + "height": 16, "widget": { "title": "Capacity Used (bytes)", "xyChart": { @@ -89,22 +99,25 @@ "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_capacityused" + "prometheusQuery": "hadoop_namenode_capacityused{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 4 + } }, { - "height": 4, + "xPos": 24, + "yPos": 32, + "width": 24, + "height": 16, "widget": { "title": "Capacity Limit (bytes)", "xyChart": { @@ -113,25 +126,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_capacitytotal{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_capacitytotal{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 4 + } }, { - "height": 4, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Block Count", "xyChart": { @@ -140,25 +158,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_totalblocks{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_totalblocks{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 0, - "yPos": 8 + } }, { - "height": 4, + "yPos": 100, + "width": 16, + "height": 16, "widget": { "title": "Corrupt Blocks", "xyChart": { @@ -167,25 +190,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_corruptblocks{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_corruptblocks{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 4, - "yPos": 8 + } }, { - "height": 4, + "xPos": 32, + "yPos": 84, + "width": 16, + "height": 16, "widget": { "title": "Missing Blocks", "xyChart": { @@ -194,25 +223,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_missingblocks{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_missingblocks{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 4, - "xPos": 8, - "yPos": 8 + } }, { - "height": 4, + "yPos": 48, + "width": 24, + "height": 16, "widget": { "title": "File Count", "xyChart": { @@ -221,25 +255,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_filestotal{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_filestotal{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 12 + } }, { - "height": 4, + "xPos": 24, + "yPos": 48, + "width": 24, + "height": 16, "widget": { "title": "File Load", "xyChart": { @@ -248,23 +288,578 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_totalfileops{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 16, + "yPos": 100, + "width": 16, + "height": 16, + "widget": { + "title": "Under-replicated Blocks", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_underreplicatedblocks{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 16, + "yPos": 12, + "width": 16, + "height": 16, + "widget": { + "title": "Dead Data Nodes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_numdeaddatanodes{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 16, + "yPos": 84, + "width": 16, + "height": 16, + "widget": { + "title": "Block Capacity", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_blockcapacity{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 136, + "width": 24, + "height": 16, + "widget": { + "title": "Error Logs", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_logerror{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 24, + "yPos": 136, + "width": 24, + "height": 16, + "widget": { + "title": "Fatal Logs", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_logfatal{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 80, + "width": 48, + "height": 4, + "widget": { + "title": "Blocks", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#E7EFFE", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 28, + "width": 48, + "height": 4, + "widget": { + "title": "Utilization", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#E7EFFE", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 116, + "width": 48, + "height": 4, + "widget": { + "title": "Errors", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#E7EFFE", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 64, + "width": 24, + "height": 16, + "widget": { + "title": "Heap Memory (mb)", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_memheapcommittedm{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + }, + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_memheapusedm{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + }, + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_memheapmaxm{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 24, + "yPos": 64, + "width": 24, + "height": 16, + "widget": { + "title": "Nonheap Memory (mb)", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_memnonheapcommittedm{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + }, + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_memnonheapusedm{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + }, + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_memnonheapmaxm{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 8, + "width": 8, + "height": 8, + "widget": { + "title": "Active Clients", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(hadoop_namenode_numactiveclients{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 16, + "width": 8, + "height": 8, + "widget": { + "title": "Active Sources", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(hadoop_namenode_numactivesources{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 24, + "width": 8, + "height": 8, + "widget": { + "title": "Active Sinks", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(hadoop_namenode_numactivesinks{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 24, + "yPos": 120, + "width": 24, + "height": 16, + "widget": { + "title": "Dropped Connections", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_numdroppedconnections{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "width": 8, + "height": 8, + "widget": { + "title": "Open Connections", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "prometheusQuery": "sum(hadoop_namenode_numopenconnections{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 32, + "yPos": 12, + "width": 16, + "height": 16, + "widget": { + "title": "Stale Data Nodes", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "hadoop_namenode_totalfileops{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "hadoop_namenode_numstaledatanodes{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 12 + } + }, + { + "xPos": 32, + "width": 8, + "height": 8, + "widget": { + "title": "Block Pool Utilization %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "max(hadoop_namenode_percentblockpoolused{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 40, + "width": 8, + "height": 8, + "widget": { + "title": "Threads", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "max(hadoop_namenode_threads{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 32, + "yPos": 100, + "width": 16, + "height": 16, + "widget": { + "title": "Excess Blocks", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "hadoop_namenode_excessblocks{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 8, + "width": 48, + "height": 4, + "widget": { + "title": "Data Nodes", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#E7EFFE", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } } ] - } + }, + "labels": {} } diff --git a/dashboards/hadoop/metadata.yaml b/dashboards/hadoop/metadata.yaml index 07e238794b..9f9f9c76ba 100644 --- a/dashboards/hadoop/metadata.yaml +++ b/dashboards/hadoop/metadata.yaml @@ -11,7 +11,7 @@ sample_dashboards: category: Hadoop id: hadoop-prometheus-overview display_name: Hadoop Prometheus Overview - description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter), including Data Nodes, Volume Failures, Capacity Used, Capacity Limit, Block Count, Corrupt Blocks, Missing Blocks, File Count, and File Load." + description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/prometheus/jmx_exporter), including Open Connections, Active Clients, Active Sources, Active Sinks, Block Pool Utilization %, Threads, Active Data Nodes, Dead Data Nodes, Stale Data Nodes, Capacity Used, Capacity Limit, File Count, File Load, Heap Memory (mb), Nonheap Memory (mb), Block Count, Block Capacity, Missing Blocks, Corrupt Blocks, Under-replicated Blocks, Excess Blocks, Volume Failures, Dropped Connections, Error Logs, and Fatal Logs." related_integrations: - id: hadoop platform: GKE diff --git a/dashboards/haproxy/README.md b/dashboards/haproxy/README.md index 0e43d87314..21689d1bef 100644 --- a/dashboards/haproxy/README.md +++ b/dashboards/haproxy/README.md @@ -7,4 +7,4 @@ |HAProxy Prometheus| |:------------------| |Filename: [haproxy-prometheus.json](haproxy-prometheus.json)| -|This dashboard has charts displaying: `Frontend HTTP Reponses`, `Backend HTTP Responses`, `Frontend HTTP Bytes In`, `Frontend HTTP Bytes Out`, `Frontend Connections`, `Backend HTTP Bytes In`, `Backend HTTP Bytes Out`, `Backend Connection Errors Rate`, `Frontend Action Sessions`, `Backend Active Sessions`, `Frontend HTTP Request Rate`, `Backend HTTP Response Rate`, `Frontend Error Rate`, `Backend Response Errors Rate`, `Frontend Denied Request Rate`, and `Backend Retry Warnings Rate`| +|This dashboard has charts displaying: `Frontend HTTP Reponses`, `Backend HTTP Responses`, `Frontend HTTP Bytes In`, `Frontend HTTP Bytes Out`, `Frontend Connections`, `Backend HTTP Bytes In`, `Backend HTTP Bytes Out`, `Backend Connection Errors Rate`, `Frontend Action Sessions`, `Backend Active Sessions`, `Frontend HTTP Request Rate`, `Backend HTTP Response Rate`, `Frontend Error Rate`, `Backend Response Errors Rate`, `Frontend Denied Request Rate`, `Average Response Time Seconds`, `Queued Connections`, and `Backend Retry Warnings Rate`| diff --git a/dashboards/haproxy/haproxy-prometheus.01.png b/dashboards/haproxy/haproxy-prometheus.01.png index 84d0195422..b47034a440 100644 Binary files a/dashboards/haproxy/haproxy-prometheus.01.png and b/dashboards/haproxy/haproxy-prometheus.01.png differ diff --git a/dashboards/haproxy/haproxy-prometheus.02.png b/dashboards/haproxy/haproxy-prometheus.02.png index 27341c837a..b1fa55d462 100644 Binary files a/dashboards/haproxy/haproxy-prometheus.02.png and b/dashboards/haproxy/haproxy-prometheus.02.png differ diff --git a/dashboards/haproxy/haproxy-prometheus.03.png b/dashboards/haproxy/haproxy-prometheus.03.png deleted file mode 100644 index 0e787b7982..0000000000 Binary files a/dashboards/haproxy/haproxy-prometheus.03.png and /dev/null differ diff --git a/dashboards/haproxy/haproxy-prometheus.json b/dashboards/haproxy/haproxy-prometheus.json index 96decab22a..c5a51cb4bd 100644 --- a/dashboards/haproxy/haproxy-prometheus.json +++ b/dashboards/haproxy/haproxy-prometheus.json @@ -1,5 +1,5 @@ { - "category": "CUSTOM", + "displayName": "HAProxy Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,13 +17,13 @@ "templateVariable": "Namespace" } ], - "displayName": "HAProxy Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 3, + "yPos": 12, + "width": 24, + "height": 8, "widget": { "title": "Frontend HTTP Responses", "xyChart": { @@ -32,25 +32,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_frontend_http_responses_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(haproxy_frontend_http_responses_total{${Cluster},${Location},${Namespace}}[1m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 0 + } }, { - "height": 3, + "xPos": 24, + "yPos": 12, + "width": 24, + "height": 8, "widget": { "title": "Backend HTTP Responses", "xyChart": { @@ -59,25 +65,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_backend_http_responses_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(haproxy_backend_http_responses_total{${Cluster},${Location},${Namespace}}[1m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 0 + } }, { - "height": 3, + "yPos": 20, + "width": 24, + "height": 8, "widget": { "title": "Frontend HTTP Bytes In", "xyChart": { @@ -86,25 +97,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_frontend_bytes_in_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(haproxy_frontend_bytes_in_total{${Cluster},${Location},${Namespace}}[1m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 0, - "yPos": 3 + } }, { - "height": 3, + "yPos": 28, + "width": 24, + "height": 8, "widget": { "title": "Frontend HTTP Bytes Out", "xyChart": { @@ -113,25 +129,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_frontend_bytes_out_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(haproxy_frontend_bytes_out_total{${Cluster},${Location},${Namespace}}[1m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 3, - "yPos": 3 + } }, { - "height": 3, + "xPos": 24, + "yPos": 20, + "width": 24, + "height": 8, "widget": { "title": "Backend HTTP Bytes In", "xyChart": { @@ -140,25 +162,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_backend_bytes_in_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(haproxy_backend_bytes_in_total{${Cluster},${Location},${Namespace}}[1m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 6, - "yPos": 3 + } }, { - "height": 3, + "xPos": 24, + "yPos": 28, + "width": 24, + "height": 8, "widget": { "title": "Backend HTTP Bytes Out", "xyChart": { @@ -167,25 +195,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_backend_bytes_out_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(haproxy_backend_bytes_out_total{${Cluster},${Location},${Namespace}}[1m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 3, - "xPos": 9, - "yPos": 3 + } }, { - "height": 3, + "yPos": 36, + "width": 24, + "height": 8, "widget": { "title": "Frontend Connections", "xyChart": { @@ -194,25 +227,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_frontend_connections_total{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(haproxy_frontend_connections_total{${Cluster},${Location},${Namespace}}[1m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 6 + } }, { - "height": 3, + "xPos": 24, + "yPos": 36, + "width": 24, + "height": 8, "widget": { "title": "Backend Connection Errors Rate", "xyChart": { @@ -221,25 +260,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(haproxy_backend_connection_errors_total{${Cluster},${Location},${Namespace}}[10m])" + "prometheusQuery": "rate(haproxy_backend_connection_errors_total{${Cluster},${Location},${Namespace}}[10m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 6 + } }, { - "height": 3, + "yPos": 44, + "width": 24, + "height": 8, "widget": { "title": "Frontend Active Sessions", "xyChart": { @@ -248,25 +292,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_frontend_current_sessions{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "haproxy_frontend_current_sessions{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 9 + } }, { - "height": 3, + "yPos": 52, + "width": 24, + "height": 8, "widget": { "title": "Frontend HTTP Request Rate", "xyChart": { @@ -275,25 +324,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum by (instance) (rate(haproxy_frontend_http_requests_total{${Cluster},${Location},${Namespace}}[10m]))" + "prometheusQuery": "sum by (instance) (rate(haproxy_frontend_http_requests_total{${Cluster},${Location},${Namespace}}[10m]))", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 12 + } }, { - "height": 3, + "xPos": 24, + "yPos": 44, + "width": 24, + "height": 8, "widget": { "title": "Backend Active Sessions", "xyChart": { @@ -302,25 +357,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "haproxy_backend_current_sessions{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "haproxy_backend_current_sessions{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 9 + } }, { - "height": 3, + "yPos": 60, + "width": 24, + "height": 8, "widget": { "title": "Frontend Error Rate", "xyChart": { @@ -329,25 +389,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum by (instance) (rate(haproxy_frontend_request_errors_total{${Cluster},${Location},${Namespace}}[10m]))" + "prometheusQuery": "sum by (instance) (rate(haproxy_frontend_request_errors_total{${Cluster},${Location},${Namespace}}[10m]))", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 15 + } }, { - "height": 3, + "yPos": 68, + "width": 24, + "height": 8, "widget": { "title": "Frontend Denied Request Rate", "xyChart": { @@ -356,25 +421,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum by (instance) (rate(haproxy_frontend_requests_denied_total{${Cluster},${Location},${Namespace}}[10m]))" + "prometheusQuery": "sum by (instance) (rate(haproxy_frontend_requests_denied_total{${Cluster},${Location},${Namespace}}[10m]))", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 18 + } }, { - "height": 3, + "xPos": 24, + "yPos": 68, + "width": 24, + "height": 8, "widget": { "title": "Backend Retry Warnings", "xyChart": { @@ -383,25 +454,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum by (instance) (rate(haproxy_backend_retry_warnings_total{${Cluster},${Location},${Namespace}}[10m]))" + "prometheusQuery": "sum by (instance) (rate(haproxy_backend_retry_warnings_total{${Cluster},${Location},${Namespace}}[10m]))", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 18 + } }, { - "height": 3, + "xPos": 24, + "yPos": 52, + "width": 24, + "height": 8, "widget": { "title": "Backend HTTP Response Rate", "xyChart": { @@ -410,25 +487,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum by (instance) (rate(haproxy_backend_http_responses_total{${Cluster},${Location},${Namespace}}[10m]))" + "prometheusQuery": "sum by (instance) (rate(haproxy_backend_http_responses_total{${Cluster},${Location},${Namespace}}[10m]))", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 12 + } }, { - "height": 3, + "xPos": 24, + "yPos": 60, + "width": 24, + "height": 8, "widget": { "title": "Backend Response Errors", "xyChart": { @@ -437,23 +520,154 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum by (instance) (rate(haproxy_backend_response_errors_total{${Cluster},${Location},${Namespace}}[10m]))" + "prometheusQuery": "sum by (instance) (rate(haproxy_backend_response_errors_total{${Cluster},${Location},${Namespace}}[10m]))", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 15 + } + }, + { + "yPos": 8, + "width": 24, + "height": 4, + "widget": { + "title": "Frontends", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "xPos": 24, + "yPos": 8, + "width": 24, + "height": 4, + "widget": { + "title": "Backends", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "width": 12, + "height": 8, + "widget": { + "title": "Frontend 2XX %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "(sum(rate(haproxy_frontend_http_responses_total{${Cluster},${Location},${Namespace},code=\"2xx\"}[1m])) / sum(rate(haproxy_frontend_http_responses_total{${Cluster},${Location},${Namespace}}[1m]))) * 100", + "unitOverride": "" + } + } + } + }, + { + "xPos": 40, + "width": 8, + "height": 8, + "widget": { + "title": "Queued Connections", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(haproxy_backend_current_queue)", + "unitOverride": "" + } + } + } + }, + { + "xPos": 32, + "width": 8, + "height": 8, + "widget": { + "title": "Average Response Time Seconds", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "avg(haproxy_backend_response_time_average_seconds)", + "unitOverride": "" + } + } + } + }, + { + "xPos": 24, + "width": 8, + "height": 8, + "widget": { + "title": "Backend 2XX %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "(sum(rate(haproxy_backend_http_responses_total{${Cluster},${Location},${Namespace},code=\"2xx\"}[1m])) / sum(rate(haproxy_backend_http_responses_total{${Cluster},${Location},${Namespace}}[1m]))) * 100", + "unitOverride": "" + } + } + } + }, + { + "xPos": 12, + "width": 12, + "height": 8, + "widget": { + "title": "Frontend 5XX %", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "(sum(rate(haproxy_frontend_http_responses_total{${Cluster},${Location},${Namespace},code=\"5xx\"}[1m])) / sum(rate(haproxy_frontend_http_responses_total{${Cluster},${Location},${Namespace}}[1m]))) * 100", + "unitOverride": "" + } + } + } } ] - } + }, + "labels": {} } diff --git a/dashboards/haproxy/metadata.yaml b/dashboards/haproxy/metadata.yaml index ca03c514ba..53d2eca924 100644 --- a/dashboards/haproxy/metadata.yaml +++ b/dashboards/haproxy/metadata.yaml @@ -4,7 +4,7 @@ sample_dashboards: id: haproxy-prometheus display_name: HAProxy Prometheus Overview description: |- - This dashboard has charts displaying: Frontend HTTP Reponses, Backend HTTP Responses, Frontend HTTP Bytes In, Frontend HTTP Bytes Out, Frontend Connections, Backend HTTP Bytes In, Backend HTTP Bytes Out, Backend Connection Errors Rate, Frontend Action Sessions, Backend Active Sessions, Frontend HTTP Request Rate, Backend HTTP Response Rate, Frontend Error Rate, Backend Response Errors Rate, Frontend Denied Request Rate, and Backend Retry Warnings Rate + This dashboard has charts displaying: Frontend HTTP Reponses, Backend HTTP Responses, Frontend HTTP Bytes In, Frontend HTTP Bytes Out, Frontend Connections, Backend HTTP Bytes In, Backend HTTP Bytes Out, Backend Connection Errors Rate, Frontend Action Sessions, Backend Active Sessions, Frontend HTTP Request Rate, Backend HTTP Response Rate, Frontend Error Rate, Backend Response Errors Rate, Frontend Denied Request Rate, Average Response Time Seconds, Queued Conections, and Backend Retry Warnings Rate related_integrations: - id: haproxy platform: GKE diff --git a/dashboards/kafka/kafka-prometheus.01.png b/dashboards/kafka/kafka-prometheus.01.png deleted file mode 100644 index b66a90d04b..0000000000 Binary files a/dashboards/kafka/kafka-prometheus.01.png and /dev/null differ diff --git a/dashboards/kafka/kafka-prometheus.02.png b/dashboards/kafka/kafka-prometheus.02.png deleted file mode 100644 index 8d5a0acdde..0000000000 Binary files a/dashboards/kafka/kafka-prometheus.02.png and /dev/null differ diff --git a/dashboards/kafka/kafka-prometheus.json b/dashboards/kafka/kafka-prometheus.json index 43a2739ff1..11c1f3109a 100644 --- a/dashboards/kafka/kafka-prometheus.json +++ b/dashboards/kafka/kafka-prometheus.json @@ -1,5 +1,5 @@ { - "category": "CUSTOM", + "displayName": "Kafka Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,67 +17,61 @@ "templateVariable": "Namespace" } ], - "displayName": "Kafka Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "yPos": 12, + "width": 24, + "height": 8, "widget": { - "title": "Topic Partitions", + "title": "Partitions", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partitions{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partitions{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], - "timeshiftDuration": "0s", + "thresholds": [], "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 12, - "xPos": 0, - "yPos": 0 + } }, { - "height": 4, + "width": 16, + "height": 8, "widget": { "title": "Brokers", - "xyChart": { - "chartOptions": { - "mode": "COLOR" - }, - "dataSets": [ - { - "plotType": "STACKED_AREA", - "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "kafka_brokers{${Cluster},${Location},${Namespace}}" - } - } - ], - "timeshiftDuration": "0s", - "yAxis": { - "scale": "LINEAR" + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "kafka_brokers{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } - }, - "width": 6, - "xPos": 0, - "yPos": 4 + } }, { - "height": 4, + "xPos": 24, + "yPos": 12, + "width": 24, + "height": 8, "widget": { "title": "Leader", "xyChart": { @@ -86,52 +80,63 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partition_leader{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partition_leader{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 4 + } }, { - "height": 4, + "yPos": 20, + "width": 24, + "height": 8, "widget": { - "title": "Under Replicated Partition", + "title": "Under Replicated Partitions", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partition_under_replicated_partition{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partition_under_replicated_partition{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 8 + } }, { - "height": 4, + "xPos": 24, + "yPos": 20, + "width": 24, + "height": 8, "widget": { "title": "Leader Is Preferred", "xyChart": { @@ -140,25 +145,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partition_leader_is_preferred{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partition_leader_is_preferred{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 8 + } }, { - "height": 4, + "yPos": 28, + "width": 24, + "height": 8, "widget": { "title": "Replicas", "xyChart": { @@ -167,52 +177,63 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partition_replicas{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partition_replicas{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 12 + } }, { - "height": 4, + "xPos": 24, + "yPos": 28, + "width": 24, + "height": 8, "widget": { - "title": "In Sync Replica", + "title": "In Sync Replicas", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partition_in_sync_replica{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partition_in_sync_replica{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 12 + } }, { - "height": 4, + "yPos": 36, + "width": 24, + "height": 8, "widget": { "title": "Current Offset", "xyChart": { @@ -221,25 +242,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partition_current_offset{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partition_current_offset{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 16 + } }, { - "height": 4, + "xPos": 24, + "yPos": 36, + "width": 24, + "height": 8, "widget": { "title": "Oldest Offset", "xyChart": { @@ -248,23 +275,82 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "STACKED_AREA", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "kafka_topic_partition_oldest_offset{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "kafka_topic_partition_oldest_offset{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 16 + } + }, + { + "yPos": 8, + "width": 48, + "height": 4, + "widget": { + "title": "Topics", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "xPos": 16, + "width": 16, + "height": 8, + "widget": { + "title": "Under Replicated Partitions", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(kafka_topic_partition_under_replicated_partition{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } + }, + { + "xPos": 32, + "width": 16, + "height": 8, + "widget": { + "title": "In Sync Replicas", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "sum(kafka_topic_partition_in_sync_replica{${Cluster},${Location},${Namespace}})", + "unitOverride": "" + } + } + } } ] - } + }, + "labels": {} } diff --git a/dashboards/kafka/kafka-prometheus.png b/dashboards/kafka/kafka-prometheus.png new file mode 100644 index 0000000000..cad098057e Binary files /dev/null and b/dashboards/kafka/kafka-prometheus.png differ diff --git a/dashboards/redis/README.md b/dashboards/redis/README.md index 8e96538713..49be20ab94 100644 --- a/dashboards/redis/README.md +++ b/dashboards/redis/README.md @@ -28,4 +28,4 @@ |Redis Prometheus| |:-----------------------| |Filename: [redis-prometheus.json](redis-prometheus.json)| -|This dashboard is based on prometheus metrics coming from an [exporter](https://github.com/oliver006/redis_exporter). This dashboard has charts highlighting `Commands Per Second`, `Connections`, `Memory Used`, `Keys`, and `Network I/O`| +|This dashboard is based on prometheus metrics coming from an [exporter](https://github.com/oliver006/redis_exporter). This dashboard has charts highlighting `Commands Per Second`, `Connections`, `Memory Used`, `Keys`, `Network I/O`, `Latency Percentiles (Milliseconds)`, `Hit Percentage`, `Blocked Clients`, `Slowlog Length`, `Evictions`, `Client Recent Max Input Buffer`, and `Fragmentation Ratio`| diff --git a/dashboards/redis/metadata.yaml b/dashboards/redis/metadata.yaml index 77db0da9d5..fa0b4b238e 100644 --- a/dashboards/redis/metadata.yaml +++ b/dashboards/redis/metadata.yaml @@ -24,7 +24,7 @@ sample_dashboards: id: redis-prometheus display_name: Redis Prometheus Overview description: |- - This dashboard has charts highlighting Commands Per Second, Connections, Memory Used, Keys, and Network I/O + This dashboard has charts highlighting Commands Per Second, Connections, Memory Used, Keys, Network I/O, Evictions, Latency Percentiles (Milliseconds), Hit Percentage, Blocked Clients, Client Recent Max Input Buffer, Slowlog Length, and Fragmentation Ratio related_integrations: - id: redis platform: GKE diff --git a/dashboards/redis/redis-prometheus.json b/dashboards/redis/redis-prometheus.json index 81d118dc6d..dbf61b7a80 100644 --- a/dashboards/redis/redis-prometheus.json +++ b/dashboards/redis/redis-prometheus.json @@ -1,5 +1,4 @@ { - "category": "CUSTOM", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -20,10 +19,10 @@ "displayName": "Redis Prometheus Overview", "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "height": 12, "widget": { "title": "Network I/O - Input", "xyChart": { @@ -32,26 +31,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(redis_net_input_bytes_total{${Cluster}, ${Location}, ${Namespace}} [5m])" + "prometheusQuery": "rate(redis_net_input_bytes_total{${Cluster}, ${Location}, ${Namespace}} [5m])", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } }, - "width": 6, - "xPos": 0, - "yPos": 8 + "width": 24, + "xPos": 24, + "yPos": 40 }, { - "height": 4, + "height": 8, "widget": { "title": "Keys", "xyChart": { @@ -60,26 +64,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "redis_db_keys{${Cluster}, ${Location}, ${Namespace}}" + "prometheusQuery": "redis_db_keys{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } }, - "width": 6, - "xPos": 6, - "yPos": 4 + "width": 24, + "yPos": 56 }, { - "height": 4, + "height": 8, "widget": { "title": "Commands Per Second", "xyChart": { @@ -88,26 +96,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum(rate(redis_commands_total{${Cluster}, ${Location}, ${Namespace}} [1m])) by (cmd)" + "prometheusQuery": "sum(rate(redis_commands_total{${Cluster}, ${Location}, ${Namespace}} [1m])) by (cmd)", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } }, - "width": 6, - "xPos": 0, - "yPos": 0 + "width": 24, + "yPos": 20 }, { - "height": 4, + "height": 8, "widget": { "title": "Connections", "xyChart": { @@ -116,26 +128,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "redis_connected_clients{${Cluster}, ${Location}, ${Namespace}}" + "prometheusQuery": "redis_connected_clients{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } }, - "width": 6, - "xPos": 0, - "yPos": 4 + "width": 24, + "yPos": 48 }, { - "height": 4, + "height": 8, "widget": { "title": "Memory Used", "xyChart": { @@ -144,26 +160,31 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "redis_memory_used_bytes{${Cluster}, ${Location}, ${Namespace}}" + "prometheusQuery": "redis_memory_used_bytes{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } }, - "width": 6, - "xPos": 6, - "yPos": 0 + "width": 24, + "xPos": 24, + "yPos": 4 }, { - "height": 4, + "height": 12, "widget": { "title": "Network I/O - Output", "xyChart": { @@ -172,23 +193,329 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(redis_net_output_bytes_total{${Cluster}, ${Location}, ${Namespace}} [5m])" + "prometheusQuery": "rate(redis_net_output_bytes_total{${Cluster}, ${Location}, ${Namespace}} [5m])", + "unitOverride": "" } } ], "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + }, + "width": 24, + "xPos": 24, + "yPos": 52 + }, + { + "height": 8, + "widget": { + "title": "Blocked Clients", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "redis_blocked_clients{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", "scale": "LINEAR" } } }, - "width": 6, - "xPos": 6, - "yPos": 8 + "width": 24, + "xPos": 24, + "yPos": 12 + }, + { + "height": 8, + "widget": { + "title": "Hit Percentage", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "redis_keyspace_hits_total{${Cluster}, ${Location}, ${Namespace}} / (redis_keyspace_hits_total{${Cluster}, ${Location}, ${Namespace}} + redis_keyspace_misses_total{${Cluster}, ${Location}, ${Namespace}})", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + }, + "width": 24, + "yPos": 12 + }, + { + "height": 8, + "widget": { + "title": "Slowlog Length", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "redis_slowlog_length{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + }, + "width": 24, + "yPos": 28 + }, + { + "height": 8, + "widget": { + "title": "Latency Percentiles (Milliseconds)", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "redis_latency_percentiles_usec{${Cluster}, ${Location}, ${Namespace}} * 0.001", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + }, + "width": 24, + "yPos": 4 + }, + { + "height": 8, + "widget": { + "title": "Evictions", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "redis_evicted_keys_total{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + }, + "width": 24, + "yPos": 40 + }, + { + "height": 8, + "widget": { + "title": "Fragmentation Ratio", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "redis_mem_fragmentation_ratio{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + }, + "width": 24, + "xPos": 24, + "yPos": 28 + }, + { + "height": 8, + "widget": { + "title": "Client Recent Max Input Buffer", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "redis_client_recent_max_input_buffer_bytes{${Cluster}, ${Location}, ${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + }, + "width": 24, + "xPos": 24, + "yPos": 20 + }, + { + "height": 4, + "widget": { + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + }, + "title": "Performance" + }, + "width": 24 + }, + { + "height": 4, + "widget": { + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + }, + "title": "Memory" + }, + "width": 24, + "xPos": 24 + }, + { + "height": 4, + "widget": { + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + }, + "title": "Database Activity" + }, + "width": 24, + "yPos": 36 + }, + { + "height": 4, + "widget": { + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#FFFFFF", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + }, + "title": "Network" + }, + "width": 24, + "xPos": 24, + "yPos": 36 } ] } diff --git a/dashboards/redis/redis-prometheus.png b/dashboards/redis/redis-prometheus.png index 43bc20b499..74744c0fff 100644 Binary files a/dashboards/redis/redis-prometheus.png and b/dashboards/redis/redis-prometheus.png differ diff --git a/dashboards/varnish/README.md b/dashboards/varnish/README.md index 908119e1ca..43760705da 100644 --- a/dashboards/varnish/README.md +++ b/dashboards/varnish/README.md @@ -13,4 +13,4 @@ |Varnish Prometheus Overview| |:------------------| |Filename: [varnish-prometheus-overview.json](varnish-prometheus-overview.json)| -|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/jonnenauha/prometheus_varnish_exporter), including `Backend Connections`, `Session Connections`, `Requests`, `Request Errors`, `Threads`, `Cache Hits / Misses`, `Expired Objects`, and `Object Structs Created`.| +|This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/jonnenauha/prometheus_varnish_exporter), including `Service Status`, `Uptime`, `Backend Up`, `Happy Backend Health Probes`, `Backend Connections`, `Reused and Recycled Backend Connections`, `Session Connections`, `Dropped Session Connections`, `Backend Requests`, `Session Requests`, `Session Request Errors`, `Active Threads`, `Threads Created`, `Object Structs Created`, `Expired Objects`, `Cache Hits`, and `Cache Misses`.| diff --git a/dashboards/varnish/metadata.yaml b/dashboards/varnish/metadata.yaml index aa0988507b..dc7093ed6e 100644 --- a/dashboards/varnish/metadata.yaml +++ b/dashboards/varnish/metadata.yaml @@ -11,7 +11,7 @@ sample_dashboards: category: Varnish id: varnish-prometheus-overview display_name: Varnish Prometheus Overview - description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/jonnenauha/prometheus_varnish_exporter), including Backend Connections, Session Connections, Requests, Request Errors, Threads, Cache Hits / Misses, Expired Objects, and Object Structs Created." + description: "This dashboard is based on prometheus metrics exposed by an [exporter](https://github.com/jonnenauha/prometheus_varnish_exporter), including Service Status, Uptime, Backend Up, Happy Backend Health Probes, Backend Connections, Reused and Recycled Backend Connections, Session Connections, Dropped Session Connections, Backend Requests, Session Requests, Session Request Errors, Active Threads, Threads Created, Object Structs Created, Expired Objects, Cache Hits, and Cache Misses." related_integrations: - id: varnish platform: GKE diff --git a/dashboards/varnish/varnish-prometheus-overview.01.png b/dashboards/varnish/varnish-prometheus-overview.01.png index f34decd43c..ec554746fe 100644 Binary files a/dashboards/varnish/varnish-prometheus-overview.01.png and b/dashboards/varnish/varnish-prometheus-overview.01.png differ diff --git a/dashboards/varnish/varnish-prometheus-overview.02.png b/dashboards/varnish/varnish-prometheus-overview.02.png index b4a9865668..ed9592c0b4 100644 Binary files a/dashboards/varnish/varnish-prometheus-overview.02.png and b/dashboards/varnish/varnish-prometheus-overview.02.png differ diff --git a/dashboards/varnish/varnish-prometheus-overview.json b/dashboards/varnish/varnish-prometheus-overview.json index 14dab9435a..629b3ff99d 100644 --- a/dashboards/varnish/varnish-prometheus-overview.json +++ b/dashboards/varnish/varnish-prometheus-overview.json @@ -1,5 +1,5 @@ -{ - "category": "CUSTOM", +{ + "displayName": "Varnish Prometheus Overview", "dashboardFilters": [ { "filterType": "RESOURCE_LABEL", @@ -17,13 +17,13 @@ "templateVariable": "Namespace" } ], - "displayName": "Varnish Prometheus Overview", - "labels": {}, "mosaicLayout": { - "columns": 12, + "columns": 48, "tiles": [ { - "height": 4, + "yPos": 12, + "width": 24, + "height": 16, "widget": { "title": "Backend Connections", "xyChart": { @@ -32,25 +32,30 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "varnish_backend_conn{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "varnish_backend_conn{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 0 + } }, { - "height": 4, + "yPos": 28, + "width": 24, + "height": 16, "widget": { "title": "Session Connections", "xyChart": { @@ -59,192 +64,521 @@ }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(varnish_main_sessions{type=\"conn\",${Cluster},${Location},${Namespace}}[5m])" + "prometheusQuery": "rate(varnish_main_sessions{type=\"conn\",${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 0 + } }, { - "height": 4, + "xPos": 16, + "yPos": 44, + "width": 16, + "height": 16, "widget": { - "title": "Requests", + "title": "Session Requests", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(varnish_main_client_req{${Cluster},${Location},${Namespace}}[5m])" + "prometheusQuery": "rate(varnish_main_client_req{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 4 + } }, { - "height": 4, + "xPos": 32, + "yPos": 44, + "width": 16, + "height": 16, "widget": { - "title": "Request Errors", + "title": "Session Request Errors", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "rate(varnish_main_client_req_400{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" + } + }, + { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "varnish_main_client_req_400{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(varnish_main_client_resp_500{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 4 + } }, { - "height": 4, + "yPos": 64, + "width": 24, + "height": 16, "widget": { - "title": "Threads", + "title": "Active Threads", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "varnish_main_threads{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "varnish_main_threads{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 8 + } }, { - "height": 4, + "yPos": 96, + "width": 24, + "height": 16, "widget": { - "title": "Cache Hits / Misses", + "title": "Cache Hits", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(varnish_main_cache_hit{${Cluster},${Location},${Namespace}}[5m])" + "prometheusQuery": "rate(varnish_main_cache_hit{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 24, + "yPos": 80, + "width": 24, + "height": 16, + "widget": { + "title": "Expired Objects", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "varnish_main_n_expired{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 80, + "width": 24, + "height": 16, + "widget": { + "title": "Object Structs Created", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "varnish_main_n_object{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 24, + "yPos": 64, + "width": 24, + "height": 16, + "widget": { + "title": "Threads Created", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "varnish_main_threads_created{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "xPos": 24, + "yPos": 12, + "width": 24, + "height": 16, + "widget": { + "title": "Reused and Recycled Backend Connections", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "varnish_main_backend_reuse{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } }, { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(varnish_main_cache_miss{${Cluster},${Location},${Namespace}}[5m])" + "prometheusQuery": "varnish_main_backend_recycle{${Cluster},${Location},${Namespace}}", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 8 + } }, { - "height": 4, + "xPos": 24, + "yPos": 28, + "width": 24, + "height": 16, "widget": { - "title": "Expired Objects", + "title": "Dropped Session Connections", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "varnish_main_n_expired{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(varnish_main_sessions{type=\"dropped\",${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 0, - "yPos": 12 + } }, { + "yPos": 8, + "width": 48, "height": 4, "widget": { - "title": "Object Structs Created", + "title": "Networking", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#E7EFFE", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "yPos": 60, + "width": 48, + "height": 4, + "widget": { + "title": "Resource Utilization", + "text": { + "content": "", + "format": "MARKDOWN", + "style": { + "backgroundColor": "#E7EFFE", + "fontSize": "FS_LARGE", + "horizontalAlignment": "H_LEFT", + "padding": "P_EXTRA_SMALL", + "pointerLocation": "POINTER_LOCATION_UNSPECIFIED", + "textColor": "#212121", + "verticalAlignment": "V_TOP" + } + } + } + }, + { + "xPos": 24, + "yPos": 96, + "width": 24, + "height": 16, + "widget": { + "title": "Cache Misses", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "breakdowns": [], + "dimensions": [], + "measures": [], + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "rate(varnish_main_cache_miss{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" + } + } + ], + "thresholds": [], + "timeshiftDuration": "0s", + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "width": 12, + "height": 8, + "widget": { + "title": "Service Status", + "scorecard": { + "thresholds": [ + { + "color": "RED", + "direction": "BELOW", + "label": "", + "value": 1 + } + ], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "varnish_up{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + } + }, + { + "xPos": 12, + "width": 12, + "height": 8, + "widget": { + "title": "Uptime", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "prometheusQuery": "varnish_main_uptime{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + } + }, + { + "xPos": 36, + "width": 12, + "height": 8, + "widget": { + "title": "Happy Backend Health Probes", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "prometheusQuery": "varnish_backend_happy{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + } + }, + { + "xPos": 24, + "width": 12, + "height": 8, + "widget": { + "title": "Backend Up", + "scorecard": { + "blankView": {}, + "thresholds": [], + "timeSeriesQuery": { + "outputFullDuration": true, + "prometheusQuery": "varnish_backend_up{${Cluster},${Location},${Namespace}}", + "unitOverride": "" + } + } + } + }, + { + "yPos": 44, + "width": 16, + "height": 16, + "widget": { + "title": "Backend Requests", "xyChart": { "chartOptions": { "mode": "COLOR" }, "dataSets": [ { + "breakdowns": [], + "dimensions": [], + "measures": [], "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "varnish_main_n_object{${Cluster},${Location},${Namespace}}" + "prometheusQuery": "rate(varnish_backend_req{${Cluster},${Location},${Namespace}}[5m])", + "unitOverride": "" } } ], + "thresholds": [], "timeshiftDuration": "0s", "yAxis": { + "label": "", "scale": "LINEAR" } } - }, - "width": 6, - "xPos": 6, - "yPos": 12 + } } ] - } + }, + "labels": {} } diff --git a/integrations/couchdb/prometheus_metadata.yaml b/integrations/couchdb/prometheus_metadata.yaml index 6739489ec6..40d4a0baef 100644 --- a/integrations/couchdb/prometheus_metadata.yaml +++ b/integrations/couchdb/prometheus_metadata.yaml @@ -49,4 +49,16 @@ platforms: prometheus_name: couchdb_httpd_open_databases kind: GAUGE value_type: DOUBLE + - name: prometheus.googleapis.com/couchdb_httpd_auth_cache_hits/gauge + prometheus_name: couchdb_httpd_auth_cache_hits + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/couchdb_httpd_auth_cache_misses/gauge + prometheus_name: couchdb_httpd_auth_cache_misses + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/couchdb_httpd_up/gauge + prometheus_name: couchdb_httpd_up + kind: GAUGE + value_type: DOUBLE install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/couchdb diff --git a/integrations/hadoop/prometheus_metadata.yaml b/integrations/hadoop/prometheus_metadata.yaml index 069029c518..9db6dd20f8 100644 --- a/integrations/hadoop/prometheus_metadata.yaml +++ b/integrations/hadoop/prometheus_metadata.yaml @@ -13,6 +13,14 @@ platforms: prometheus_name: hadoop_namenode_numlivedatanodes kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_numdeaddatanodes/unknown:counter + prometheus_name: hadoop_namenode_numdeaddatanodes + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_numstaledatanodes/unknown:counter + prometheus_name: hadoop_namenode_numstaledatanodes + kind: CUMULATIVE + value_type: DOUBLE - name: prometheus.googleapis.com/hadoop_namenode_volumefailurestotal/unknown:counter prometheus_name: hadoop_namenode_volumefailurestotal kind: CUMULATIVE @@ -29,10 +37,22 @@ platforms: prometheus_name: hadoop_namenode_totalblocks kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_blockcapacity/unknown:counter + prometheus_name: hadoop_namenode_blockcapacity + kind: CUMULATIVE + value_type: DOUBLE - name: prometheus.googleapis.com/hadoop_namenode_corruptblocks/unknown:counter prometheus_name: hadoop_namenode_corruptblocks kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_underreplicatedblocks/unknown:counter + prometheus_name: hadoop_namenode_underreplicatedblocks + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_excessblocks/unknown:counter + prometheus_name: hadoop_namenode_excessblocks + kind: CUMULATIVE + value_type: DOUBLE - name: prometheus.googleapis.com/hadoop_namenode_filestotal/unknown:counter prometheus_name: hadoop_namenode_filestotal kind: CUMULATIVE @@ -45,4 +65,44 @@ platforms: prometheus_name: hadoop_namenode_missingblocks kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_numopenconnections/unknown:counter + prometheus_name: hadoop_namenode_numopenconnections + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_numdroppedconnections/unknown:counter + prometheus_name: hadoop_namenode_numdroppedconnections + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_numactiveclients/unknown:counter + prometheus_name: hadoop_namenode_numactiveclients + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_numactivesources/unknown:counter + prometheus_name: hadoop_namenode_numactivesources + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_numactivesinks/unknown:counter + prometheus_name: hadoop_namenode_numactivesinks + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_percentblockpoolused/unknown:counter + prometheus_name: hadoop_namenode_percentblockpoolused + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_threads/unknown:counter + prometheus_name: hadoop_namenode_threads + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_memheapcommittedm/unknown:counter + prometheus_name: hadoop_namenode_memheapcommittedm + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_logerror/unknown:counter + prometheus_name: hadoop_namenode_logerror + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/hadoop_namenode_logfatal/unknown:counter + prometheus_name: hadoop_namenode_logfatal + kind: CUMULATIVE + value_type: DOUBLE install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/hadoop diff --git a/integrations/haproxy/prometheus_metadata.yaml b/integrations/haproxy/prometheus_metadata.yaml index db8e78c6ea..1eeac11f2b 100644 --- a/integrations/haproxy/prometheus_metadata.yaml +++ b/integrations/haproxy/prometheus_metadata.yaml @@ -49,6 +49,14 @@ platforms: prometheus_name: haproxy_backend_current_sessions kind: GAUGE value_type: DOUBLE + - name: prometheus.googleapis.com/haproxy_backend_response_time_average_seconds/gauge + prometheus_name: haproxy_backend_response_time_average_seconds + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/haproxy_backend_current_queue/gauge + prometheus_name: haproxy_backend_current_queue + kind: GAUGE + value_type: DOUBLE - name: prometheus.googleapis.com/haproxy_frontend_http_requests_total/counter prometheus_name: haproxy_frontend_http_requests_total kind: CUMULATIVE diff --git a/integrations/redis/prometheus_metadata.yaml b/integrations/redis/prometheus_metadata.yaml index 271c9b4b28..13548d338b 100644 --- a/integrations/redis/prometheus_metadata.yaml +++ b/integrations/redis/prometheus_metadata.yaml @@ -33,4 +33,36 @@ platforms: prometheus_name: redis_net_input_bytes_total kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/redis_latency_percentiles_usec/summary:counter + prometheus_name: redis_latency_percentiles_usec + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/redis_keyspace_hits_total/counter + prometheus_name: redis_keyspace_hits_total + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/redis_keyspace_misses_total/counter + prometheus_name: redis_keyspace_misses_total + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/redis_blocked_clients/gauge + prometheus_name: redis_blocked_clients + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/redis_client_recent_max_input_buffer_bytes/gauge + prometheus_name: redis_client_recent_max_input_buffer_bytes + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/redis_mem_fragmentation_ratio/gauge + prometheus_name: redis_mem_fragmentation_ratio + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/redis_slowlog_length/gauge + prometheus_name: redis_slowlog_length + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/redis_evicted_keys_total/counter + prometheus_name: redis_evicted_keys_total + kind: CUMULATIVE + value_type: DOUBLE install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/redis diff --git a/integrations/varnish/prometheus_metadata.yaml b/integrations/varnish/prometheus_metadata.yaml index 2db1ea17e2..74e5b37fe7 100644 --- a/integrations/varnish/prometheus_metadata.yaml +++ b/integrations/varnish/prometheus_metadata.yaml @@ -29,6 +29,10 @@ platforms: prometheus_name: varnish_main_client_req_400 kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_main_client_resp_500/counter + prometheus_name: varnish_main_client_resp_500 + kind: CUMULATIVE + value_type: DOUBLE - name: prometheus.googleapis.com/varnish_main_threads/gauge prometheus_name: varnish_main_threads kind: GAUGE @@ -37,6 +41,10 @@ platforms: prometheus_name: varnish_main_threads_failed kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_main_threads_created/counter + prometheus_name: varnish_main_threads_created + kind: CUMULATIVE + value_type: DOUBLE - name: prometheus.googleapis.com/varnish_main_cache_hit/counter prometheus_name: varnish_main_cache_hit kind: CUMULATIVE @@ -57,4 +65,28 @@ platforms: prometheus_name: varnish_main_n_obj_purged kind: CUMULATIVE value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_up/gauge + prometheus_name: varnish_up + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_main_uptime/counter + prometheus_name: varnish_main_uptime + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_backend_up/gauge + prometheus_name: varnish_backend_up + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_backend_happy/gauge + prometheus_name: varnish_backend_happy + kind: GAUGE + value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_main_backend_reuse/counter + prometheus_name: varnish_main_backend_reuse + kind: CUMULATIVE + value_type: DOUBLE + - name: prometheus.googleapis.com/varnish_backend_req/counter + prometheus_name: varnish_backend_req + kind: CUMULATIVE + value_type: DOUBLE install_documentation_url: https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/varnish