diff --git a/operators/multiclusterobservability/manifests/base/config/metrics_allowlist.yaml b/operators/multiclusterobservability/manifests/base/config/metrics_allowlist.yaml index 6724b1a785..8c0e64af31 100644 --- a/operators/multiclusterobservability/manifests/base/config/metrics_allowlist.yaml +++ b/operators/multiclusterobservability/manifests/base/config/metrics_allowlist.yaml @@ -143,6 +143,13 @@ data: - cnv:vmi_status_running:count - kubevirt_hyperconverged_operator_health_status - kubevirt_hco_system_health_status + - kubevirt_vmi_info + - kubevirt_vm_running_status_last_transition_timestamp_seconds + - kubevirt_vm_non_running_status_last_transition_timestamp_seconds + - kubevirt_vm_error_status_last_transition_timestamp_seconds + - kubevirt_vm_starting_status_last_transition_timestamp_seconds + - kubevirt_vm_migrating_status_last_transition_timestamp_seconds + - kubevirt_vmi_memory_available_bytes matches: - __name__="workqueue_queue_duration_seconds_bucket",job="apiserver" - __name__="workqueue_adds_total",job="apiserver" diff --git a/operators/multiclusterobservability/manifests/base/grafana/dash-acm-virtual-machines-health.yaml b/operators/multiclusterobservability/manifests/base/grafana/dash-acm-virtual-machines-health.yaml new file mode 100644 index 0000000000..e77415a4df --- /dev/null +++ b/operators/multiclusterobservability/manifests/base/grafana/dash-acm-virtual-machines-health.yaml @@ -0,0 +1,417 @@ +apiVersion: v1 +data:Virtual Machines Health + acm-virtual-machines-health.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "This dashboard provides a quick overview of the health status of Virtual Machines (VMs) across clusters in the KubeVirt environment. It helps users identify VMs that are currently in unhealthy states and those that have been in such states for an extended period, potentially making them candidates for cleanup. Use the filters to customize the view based on cluster, namespace, VM name, and duration in an unhealthy state for efficient monitoring and management.", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 41, + "iteration": 1725979970120, + "links": [], + "panels": [ + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "custom.filterable", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "namespace" + }, + "properties": [ + { + "id": "custom.filterable", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.filterable", + "value": true + }, + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "custom.filterable", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "status" + }, + "properties": [ + { + "id": "custom.filterable", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Value" + } + ] + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "topk($top_results,\n(time() - (label_replace(kubevirt_vm_starting_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"}>0 ,\"status\",\"starting\",\"\",\"\"))> $days_in_status_gt*24*60*60) + on(cluster, name, namespace) group_left()(0*(sum by (cluster, namespace, name)($status>0))) or\n(time() - (label_replace(kubevirt_vm_non_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"}>0,\"status\",\"stopped\",\"\",\"\"))> $days_in_status_gt*24*60*60) + on(cluster, name, namespace) group_left()(0*(sum by (cluster, namespace, name)($status>0))) or\n(time() - (label_replace(kubevirt_vm_error_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"}>0,\"status\",\"error\",\"\",\"\"))> $days_in_status_gt*24*60*60) + on(cluster, name, namespace) group_left()(0*(sum by (cluster, namespace, name)($status>0))) or\n(time() - (label_replace(kubevirt_vm_migrating_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"}>0,\"status\",\"migrating\",\"\",\"\"))> $days_in_status_gt*24*60*60) + on(cluster, name, namespace) group_left()(0*(sum by (cluster, namespace, name)($status>0)))\n)", + "format": "table", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "topk($top_results,\n (\n (\n (time() - label_replace(kubevirt_vm_starting_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"starting\", \"\", \"\")) > $days_in_status_gt * 24 * 60 * 60\n ) and (\n (time() - label_replace(kubevirt_vm_starting_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"starting\", \"\", \"\")) < $days_in_status_lt * 24 * 60 * 60\n )\n ) + on(cluster, name, namespace) group_left() (0 * (sum by (cluster, namespace, name)($status > 0)))\n or\n (\n (\n (time() - label_replace(kubevirt_vm_non_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"stopped\", \"\", \"\")) > $days_in_status_gt * 24 * 60 * 60\n ) and (\n (time() - label_replace(kubevirt_vm_non_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"stopped\", \"\", \"\")) < $days_in_status_lt * 24 * 60 * 60\n )\n ) + on(cluster, name, namespace) group_left() (0 * (sum by (cluster, namespace, name)($status > 0)))\n or\n (\n (\n (time() - label_replace(kubevirt_vm_error_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"error\", \"\", \"\")) > $days_in_status_gt * 24 * 60 * 60\n ) and (\n (time() - label_replace(kubevirt_vm_error_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"error\", \"\", \"\")) < $days_in_status_lt * 24 * 60 * 60\n )\n ) + on(cluster, name, namespace) group_left() (0 * (sum by (cluster, namespace, name)($status > 0)))\n or\n (\n (\n (time() - label_replace(kubevirt_vm_migrating_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"migrating\", \"\", \"\")) > $days_in_status_gt * 24 * 60 * 60\n ) and (\n (time() - label_replace(kubevirt_vm_migrating_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"migrating\", \"\", \"\")) < $days_in_status_lt * 24 * 60 * 60\n )\n ) + on(cluster, name, namespace) group_left() (0 * (sum by (cluster, namespace, name)($status > 0)))\n)\n", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + } + ], + "title": "Virtual Machines List by Time In Status", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "clusterID": true, + "container": true, + "endpoint": true, + "instance": true, + "job": true, + "pod": true, + "receive": true, + "service": true, + "tenant_id": true + }, + "indexByName": {}, + "renameByName": { + "Value": "Time in Status" + } + } + } + ], + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "Virtualization" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": null, + "definition": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds, cluster)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds, cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": null, + "definition": "label_values(kubevirt_vmi_info, name)", + "description": "Filter the Virtual Machine by its name", + "error": null, + "hide": 0, + "includeAll": true, + "label": "VM Name", + "multi": true, + "name": "name", + "options": [], + "query": { + "query": "label_values(kubevirt_vmi_info, name)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": null, + "definition": "label_values(kubevirt_vmi_info, namespace)", + "description": "Filter the Virtual Machine by its Namespace", + "error": null, + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(kubevirt_vmi_info, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "stopped", + "value": "kubevirt_vm_non_running_status_last_transition_timestamp_seconds" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Status", + "multi": false, + "name": "status", + "options": [ + { + "selected": true, + "text": "stopped", + "value": "kubevirt_vm_non_running_status_last_transition_timestamp_seconds" + }, + { + "selected": false, + "text": "starting", + "value": "kubevirt_vm_starting_status_last_transition_timestamp_seconds" + }, + { + "selected": false, + "text": "migrating", + "value": "kubevirt_vm_migrating_status_last_transition_timestamp_seconds" + }, + { + "selected": false, + "text": "error", + "value": "kubevirt_vm_error_status_last_transition_timestamp_seconds" + } + ], + "query": "stopped : kubevirt_vm_non_running_status_last_transition_timestamp_seconds, starting : kubevirt_vm_starting_status_last_transition_timestamp_seconds, migrating : kubevirt_vm_migrating_status_last_transition_timestamp_seconds, error : kubevirt_vm_error_status_last_transition_timestamp_seconds", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "current": { + "selected": false, + "text": "90", + "value": "90" + }, + "description": "Filter the Virtual Machines that are in the specific status for more then the selected number of days", + "error": null, + "hide": 0, + "label": "Days in Status >", + "name": "days_in_status_gt", + "options": [ + { + "selected": true, + "text": "90", + "value": "90" + } + ], + "query": "90", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": false, + "text": "1000", + "value": "1000" + }, + "description": "Filter the Virtual Machines that are in the specific status for less then the selected number of days", + "error": null, + "hide": 0, + "label": "Days in Status <", + "name": "days_in_status_lt", + "options": [ + { + "selected": true, + "text": "1000", + "value": "1000" + } + ], + "query": "1000", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "description": null, + "error": null, + "hide": 0, + "label": "Number of Results", + "name": "top_results", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + } + ], + "query": "10", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Virtual Machines Health", + "uid": "lMD6V93Sz", + "version": 1 + } +kind: ConfigMap +metadata: + name: grafana-dashboard-acm-virtual-machines-health + namespace: open-cluster-management-observability + annotations: + observability.open-cluster-management.io/dashboard-folder: "ACM / OpenShift Virtualization" diff --git a/operators/multiclusterobservability/manifests/base/grafana/dash-acm-virtual-machines-inventory.yaml b/operators/multiclusterobservability/manifests/base/grafana/dash-acm-virtual-machines-inventory.yaml new file mode 100644 index 0000000000..0a2a4d5e52 --- /dev/null +++ b/operators/multiclusterobservability/manifests/base/grafana/dash-acm-virtual-machines-inventory.yaml @@ -0,0 +1,319 @@ +apiVersion: v1 +data: + acm-openshift-virtual-machines-inventory.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "This dashboard shows details related to the OpenShift Virtualization operator and Virtual machines", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 39, + "iteration": 1725979425774, + "links": [], + "panels": [ + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byType", + "options": "string" + }, + "properties": [ + { + "id": "custom.filterable", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 26, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Node" + } + ] + }, + "pluginVersion": "8.5.20", + "targets": [ + { + "exemplar": true, + "expr": "sum by (cluster, namespace, name, status) (\n label_replace(kubevirt_vm_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"running\", \"__name__\", \".*\") \n or label_replace(kubevirt_vm_non_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"stopped\", \"__name__\", \".*\")\n or label_replace(kubevirt_vm_error_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"error\", \"__name__\", \".*\") \n or label_replace(kubevirt_vm_starting_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"starting\", \"__name__\", \".*\") \n or label_replace(kubevirt_vm_migrating_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"} > 0, \"status\", \"migrating\", \"__name__\", \".*\")\n)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": true, + "expr": "kubevirt_vmi_info{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum by (cluster, namespace, name, node)(kubevirt_vmi_memory_available_bytes{cluster=~\"$cluster\", name=~\"$name\", namespace=~\"$namespace\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + } + ], + "title": "Virtual Machines Inventory", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value #A": true, + "Value #B": true, + "Value #C": true, + "__name__": true, + "clusterID": true, + "container": true, + "endpoint": true, + "guest_os_kernel_release": true, + "guest_os_machine": true, + "instance": true, + "job": true, + "os": true, + "phase": true, + "pod": true, + "receive": true, + "service": true, + "tenant_id": true + }, + "indexByName": { + "Time": 0, + "Value #A": 22, + "Value #B": 21, + "Value #C": 28, + "__name__": 23, + "cluster": 1, + "clusterID": 24, + "container": 26, + "endpoint": 27, + "flavor": 11, + "guest_os_kernel_release": 15, + "guest_os_machine": 14, + "guest_os_name": 6, + "guest_os_version_id": 7, + "instance": 25, + "instance_type": 8, + "job": 16, + "name": 2, + "namespace": 3, + "node": 5, + "os": 13, + "phase": 12, + "pod": 17, + "preference": 9, + "receive": 18, + "service": 19, + "status": 4, + "tenant_id": 20, + "workload": 10 + }, + "renameByName": { + "Value #A": "", + "cluster": "Cluster", + "flavor": "Flavor", + "guest_os_name": "OS Name", + "guest_os_version_id": "OS Version", + "instance_type": "Instance Type", + "name": "VM Name", + "namespace": "Namespace", + "node": "Node", + "os": "", + "phase": "Phase", + "preference": "Preference", + "status": "Status", + "workload": "Workload" + } + } + } + ], + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "Virtualization" + ], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": false, + "text": "bleeding", + "value": "bleeding" + }, + "datasource": null, + "definition": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds, cluster)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds, cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": null, + "definition": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\"}, namespace)", + "description": "Filter the Virtual Machine by its Namespace", + "error": null, + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\"}, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": null, + "definition": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}, name)", + "description": "Filter the Virtual Machine by its name", + "error": null, + "hide": 0, + "includeAll": true, + "label": "VM Name", + "multi": true, + "name": "name", + "options": [], + "query": { + "query": "label_values(kubevirt_vm_running_status_last_transition_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}, name)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Virtual Machines Inventory", + "uid": "Q2U8w8qIz", + "version": 1 + } +kind: ConfigMap +metadata: + name: grafana-dashboard-acm-virtual-machines-inventory + namespace: open-cluster-management-observability + annotations: + observability.open-cluster-management.io/dashboard-folder: "ACM / OpenShift Virtualization" diff --git a/operators/multiclusterobservability/manifests/base/grafana/kustomization.yaml b/operators/multiclusterobservability/manifests/base/grafana/kustomization.yaml index 4f1cda4dc9..4746a45ddf 100644 --- a/operators/multiclusterobservability/manifests/base/grafana/kustomization.yaml +++ b/operators/multiclusterobservability/manifests/base/grafana/kustomization.yaml @@ -35,3 +35,5 @@ resources: - dash-k8s-pods-in-namespace-ocp311.yaml - dash-k8s-summary-by-node-ocp311.yaml - dash-acm-openshift-virtualization-overview.yaml +- dash-acm-virtual-machines-inventory.yaml +- dash-acm-openshift-virtualization-overview.yaml