From 42613a460855ce981a88af0e69f4f70d89a60af1 Mon Sep 17 00:00:00 2001 From: adinhodovic Date: Wed, 15 Nov 2023 00:57:50 +0100 Subject: [PATCH 1/2] feat: Add operational dashboard --- config.libsonnet | 1 + dashboards/argo-cd-applications.libsonnet | 36 +- dashboards/argo-cd-notifications.libsonnet | 4 + dashboards/argo-cd-operational.libsonnet | 1210 ++++++++++++++++++++ dashboards/dashboards.libsonnet | 1 + 5 files changed, 1238 insertions(+), 14 deletions(-) create mode 100644 dashboards/argo-cd-operational.libsonnet diff --git a/config.libsonnet b/config.libsonnet index dc68434..fe66781 100644 --- a/config.libsonnet +++ b/config.libsonnet @@ -14,6 +14,7 @@ local annotation = g.dashboard.annotation; grafanaUrl: 'https://grafana.com', argoCdUrl: 'https://argocd.com', + operationalOverviewDashboardUid: 'argo-cd-operational-overview-kask', applicationOverviewDashboardUid: 'argo-cd-application-overview-kask', notificationsOverviewDashboardUid: 'argo-cd-notifications-overview-kask', diff --git a/dashboards/argo-cd-applications.libsonnet b/dashboards/argo-cd-applications.libsonnet index 7efb07f..469a5ac 100644 --- a/dashboards/argo-cd-applications.libsonnet +++ b/dashboards/argo-cd-applications.libsonnet @@ -286,8 +286,9 @@ local tbOverride = tbStandardOptions.override; tablePanel.new( 'Applications Unhealthy', ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Application') + + ) + tbQueryOptions.withTargets( prometheus.new( '$datasource', @@ -307,7 +308,7 @@ local tbOverride = tbStandardOptions.override; dest_server: 'Cluster', project: 'Project', name: 'Application', - health_status: 'Sync Status', + health_status: 'Health Status', }, indexByName: { name: 0, @@ -354,8 +355,9 @@ local tbOverride = tbStandardOptions.override; tablePanel.new( 'Applications Out Of Sync', ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Application') + + ) + tbQueryOptions.withTargets( prometheus.new( '$datasource', @@ -426,8 +428,9 @@ local tbOverride = tbStandardOptions.override; tablePanel.new( 'Applications That Failed to Sync[7d]', ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Application') + + ) + tbQueryOptions.withTargets( prometheus.new( '$datasource', @@ -494,8 +497,9 @@ local tbOverride = tbStandardOptions.override; tablePanel.new( 'Applications With Auto Sync Disabled', ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Application') + + ) + tbQueryOptions.withTargets( prometheus.new( '$datasource', @@ -682,7 +686,11 @@ local tbOverride = tbStandardOptions.override; dashboard.withVariables(variables) + dashboard.withPanels( [ - summaryRow, + summaryRow + + row.gridPos.withX(0) + + row.gridPos.withY(0) + + row.gridPos.withW(24) + + row.gridPos.withH(1), appHealthStatusTimeSeriesPanel + timeSeriesPanel.gridPos.withX(0) + timeSeriesPanel.gridPos.withY(1) + @@ -727,10 +735,10 @@ local tbOverride = tbStandardOptions.override; ) + [ appRow + - timeSeriesPanel.gridPos.withX(0) + - timeSeriesPanel.gridPos.withY(23) + - timeSeriesPanel.gridPos.withW(24) + - timeSeriesPanel.gridPos.withH(1), + row.gridPos.withX(0) + + row.gridPos.withY(23) + + row.gridPos.withW(24) + + row.gridPos.withH(1), ] + grid.makeGrid( diff --git a/dashboards/argo-cd-notifications.libsonnet b/dashboards/argo-cd-notifications.libsonnet index 8c93b6e..effa66e 100644 --- a/dashboards/argo-cd-notifications.libsonnet +++ b/dashboards/argo-cd-notifications.libsonnet @@ -173,6 +173,10 @@ local tsLegend = tsOptions.legend; dashboard.withPanels( [ summaryRow, + row.gridPos.withX(0) + + row.gridPos.withY(0) + + row.gridPos.withW(24) + + row.gridPos.withH(1), ] + grid.makeGrid( [ diff --git a/dashboards/argo-cd-operational.libsonnet b/dashboards/argo-cd-operational.libsonnet new file mode 100644 index 0000000..9ed5b20 --- /dev/null +++ b/dashboards/argo-cd-operational.libsonnet @@ -0,0 +1,1210 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local dashboard = g.dashboard; +local row = g.panel.row; +local grid = g.util.grid; + +local statPanel = g.panel.stat; +local tablePanel = g.panel.table; +local timeSeriesPanel = g.panel.timeSeries; +local heatmapPanel = g.panel.heatmap; + +local variable = dashboard.variable; +local datasource = variable.datasource; +local query = variable.query; +local prometheus = g.query.prometheus; + +// Timeseries +local tsOptions = timeSeriesPanel.options; +local tsStandardOptions = timeSeriesPanel.standardOptions; +local tsQueryOptions = timeSeriesPanel.queryOptions; +local tsFieldConfig = timeSeriesPanel.fieldConfig; +local tsCustom = tsFieldConfig.defaults.custom; +local tsLegend = tsOptions.legend; + +// Table +local tbOptions = tablePanel.options; +local tbStandardOptions = tablePanel.standardOptions; +local tbPanelOptions = tablePanel.panelOptions; +local tbQueryOptions = tablePanel.queryOptions; +local tbFieldConfig = tablePanel.fieldConfig; +local tbCustom = tbFieldConfig.defaults.custom; +local tbOverride = tbStandardOptions.override; + +// HeatmapPanel +local hmOptions = heatmapPanel.options; +local hmStandardOptions = heatmapPanel.standardOptions; +local tbPanelOptions = tablePanel.panelOptions; +local hmQueryOptions = heatmapPanel.queryOptions; +local tbFieldConfig = tablePanel.fieldConfig; +local tbCustom = tbFieldConfig.defaults.custom; +local tbOverride = tbStandardOptions.override; + +{ + grafanaDashboards+:: { + + local datasourceVariable = + datasource.new( + 'datasource', + 'prometheus', + ) + + datasource.generalOptions.withLabel('Data Source'), + + local namespaceVariable = + query.new( + 'namespace', + 'label_values(argocd_app_info{}, namespace)' + ) + + query.withDatasourceFromVariable(datasourceVariable) + + query.withSort(1) + + query.generalOptions.withLabel('Namespace') + + query.selectionOptions.withMulti(true) + + query.selectionOptions.withIncludeAll(true) + + query.refresh.onLoad() + + query.refresh.onTime(), + + local jobVariable = + query.new( + 'job', + 'label_values(job)', + ) + + query.withDatasourceFromVariable(datasourceVariable) + + query.withSort(1) + + query.withRegex('argo.*') + + query.generalOptions.withLabel('Job') + + query.selectionOptions.withMulti(true) + + query.selectionOptions.withIncludeAll(true, '.*') + + query.refresh.onLoad() + + query.refresh.onTime(), + + local clusterVariable = + query.new( + 'cluster', + 'label_values(argocd_app_info{namespace=~"$namespace", job=~"$job"}, dest_server)', + ) + + query.withDatasourceFromVariable(datasourceVariable) + + query.withSort(1) + + query.generalOptions.withLabel('Cluster') + + query.selectionOptions.withMulti(true) + + query.selectionOptions.withIncludeAll(true) + + query.refresh.onLoad() + + query.refresh.onTime(), + + local projectVariable = + query.new( + 'project', + 'label_values(argocd_app_info{namespace=~"$namespace", job=~"$job", dest_server=~"$cluster"}, project)', + ) + + query.withDatasourceFromVariable(datasourceVariable) + + query.withSort(1) + + query.generalOptions.withLabel('Project') + + query.selectionOptions.withMulti(true) + + query.selectionOptions.withIncludeAll(true) + + query.refresh.onLoad() + + query.refresh.onTime(), + + local variables = [ + datasourceVariable, + namespaceVariable, + jobVariable, + clusterVariable, + projectVariable, + ], + + local commonLabels = ||| + namespace=~'$namespace', + job=~'$job', + dest_server=~'$cluster', + project=~'$project', + |||, + + local clustersCountQuery = ||| + sum( + argocd_cluster_info{ + namespace=~'$namespace', + job=~'$job' + } + ) + |||, + + local clustersCountStatPanel = + statPanel.new( + 'Clusters', + ) + + statPanel.queryOptions.withTargets( + prometheus.new( + '$datasource', + clustersCountQuery, + ) + ), + + local repositoriesCountQuery = ||| + count( + count( + argocd_app_info{ + namespace=~'$namespace', + job=~'$job' + } + ) + by (repo) + ) + |||, + + local repositoriesCountStatPanel = + statPanel.new( + 'Repositories', + ) + + statPanel.queryOptions.withTargets( + prometheus.new( + '$datasource', + repositoriesCountQuery, + ) + ), + + local appsCountQuery = ||| + sum( + argocd_app_info{ + %s + } + ) + ||| % commonLabels, + + local appsCountStatPanel = + statPanel.new( + 'Applications', + ) + + statPanel.queryOptions.withTargets( + prometheus.new( + '$datasource', + appsCountQuery, + ) + ), + + local appsQuery = ||| + sum( + argocd_app_info{ + %s + } + ) by (job, dest_server, project, name, health_status, sync_status) + ||| % commonLabels, + + local appsTablePanel = + tablePanel.new( + 'Applications', + ) + + tbOptions.withSortBy( + tbOptions.sortBy.withDisplayName('Application') + ) + + tbQueryOptions.withTargets( + prometheus.new( + '$datasource', + appsQuery, + ) + + prometheus.withFormat('table') + + prometheus.withInstant(true) + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + job: 'Job', + dest_server: 'Cluster', + project: 'Project', + name: 'Application', + health_status: 'Health Status', + sync_status: 'Sync Status', + }, + indexByName: { + name: 0, + project: 1, + health_status: 2, + sync_status: 3, + }, + excludeByName: { + Time: true, + job: true, + dest_server: true, + Value: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('name') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withLinks( + tbPanelOptions.link.withTitle('Go To Application') + + tbPanelOptions.link.withType('dashboard') + + tbPanelOptions.link.withUrl( + '/d/%s/argocd-notifications-overview?&var-project=${__data.fields.Project}&var-application=${__value.raw}' % $._config.applicationOverviewDashboardUid + ) + ) + ), + ]), + + local syncActivityQuery = ||| + sum( + round( + increase( + argocd_app_sync_total{ + %s + }[$__rate_interval] + ) + ) + ) by (job, dest_server, project, name) + ||| % commonLabels, + + local syncActivityTimeSeriesPanel = + timeSeriesPanel.new( + 'Sync Activity', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + syncActivityQuery, + ) + + prometheus.withLegendFormat( + '{{ dest_server }}/{{ project }}/{{ name }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local syncFailuresQuery = ||| + sum( + round( + increase( + argocd_app_sync_total{ + %s + phase=~"Error|Failed" + }[$__rate_interval] + ) + ) + ) by (job, dest_server, project, application, phase) + ||| % commonLabels, + + local syncFailuresTimeSeriesPanel = + timeSeriesPanel.new( + 'Sync Failures', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + syncFailuresQuery, + ) + + prometheus.withLegendFormat( + '{{ dest_server }}/{{ project }}/{{ application }} - {{ phase }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local reconcilationActivityQuery = ||| + sum( + round( + increase( + argocd_app_reconcile_count{ + namespace=~'$namespace', + job=~'$job', + dest_server=~'$cluster' + }[$__rate_interval] + ) + ) + ) by (namespace, job, dest_server) + |||, + + local reconcilationActivtyTimeSeriesPanel = + timeSeriesPanel.new( + 'Recociliation Activity', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + reconcilationActivityQuery, + ) + + prometheus.withLegendFormat( + '{{ namespace }}/{{ dest_server }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local reconcilationPerformanceQuery = ||| + sum( + increase( + argocd_app_reconcile_bucket{ + namespace=~'$namespace', + job=~'$job', + dest_server=~'$cluster' + }[$__rate_interval] + ) + ) by (le) + |||, + + local reconcilationPerformanceHeatmapPanel = + heatmapPanel.new( + 'Reconciliation Performance', + ) + + hmQueryOptions.withTargets( + prometheus.new( + '$datasource', + reconcilationPerformanceQuery, + ) + + prometheus.withLegendFormat( + '{{ le }}' + ) + + prometheus.withFormat('heatmap') + ) + + hmStandardOptions.withUnit('short'), + + local k8sApiActivityQuery = ||| + sum( + round( + increase( + argocd_app_k8s_request_total{ + namespace=~'$namespace', + job=~'$job', + project=~'$project' + }[$__rate_interval] + ) + ) + ) by (job, server, project, verb, resource_kind) + |||, + + local k8sApiActivityTimeSeriesPanel = + timeSeriesPanel.new( + 'K8s API Activity', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + k8sApiActivityQuery, + ) + + prometheus.withLegendFormat( + '{{ server }}/{{ project }} - {{ verb }}/{{ resource_kind }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local pendingKubectlRunQuery = ||| + sum( + argocd_kubectl_exec_pending{ + namespace=~'$namespace', + job=~'$job' + } + ) by (job, command) + |||, + + local pendingKubectlTimeSeriesPanel = + timeSeriesPanel.new( + 'Pending Kubectl Runs', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + pendingKubectlRunQuery, + ) + + prometheus.withLegendFormat( + '{{ dest_server }} - {{ command }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local resourceObjectsQuery = ||| + sum( + argocd_cluster_api_resource_objects{ + namespace=~'$namespace', + job=~'$job', + server=~'$cluster' + } + ) by (namespace, job, server) + |||, + + local resourceObjectsTimeSeriesPanel = + timeSeriesPanel.new( + 'Resource Objects', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + resourceObjectsQuery, + ) + + prometheus.withLegendFormat( + '{{ server }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local apiResourcesQuery = ||| + sum( + argocd_cluster_api_resources{ + namespace=~'$namespace', + job=~'$job', + server=~'$cluster' + } + ) by (namespace, job, server) + |||, + + local apiResourcesTimeSeriesPanel = + timeSeriesPanel.new( + 'API Resources', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + apiResourcesQuery, + ) + + prometheus.withLegendFormat( + '{{ server }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local clusterEventsQuery = ||| + sum( + increase( + argocd_cluster_events_total{ + namespace=~'$namespace', + job=~'$job', + server=~'$cluster' + }[$__rate_interval] + ) + ) by (namespace, job, server) + |||, + + local clusterEventsTimeSeriesPanel = + timeSeriesPanel.new( + 'Cluster Events', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + clusterEventsQuery, + ) + + prometheus.withLegendFormat( + '{{ server }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local gitRequestsLsRemoteQuery = ||| + sum( + increase( + argocd_git_request_total{ + namespace=~'$namespace', + job=~'$job', + request_type="ls-remote" + }[$__rate_interval] + ) + ) by (namespace, job, repo) + |||, + + local gitRequestsLsRemoteTimeSeriesPanel = + timeSeriesPanel.new( + 'Git Requests (ls-remote)', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + gitRequestsLsRemoteQuery, + ) + + prometheus.withLegendFormat( + '{{ namespace }} - {{ repo }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local gitRequestsCheckoutQuery = ||| + sum( + increase( + argocd_git_request_total{ + namespace=~'$namespace', + job=~'$job', + request_type="fetch" + }[$__rate_interval] + ) + ) by (namespace, job, repo) + |||, + + local gitRequestsCheckoutTimeSeriesPanel = + timeSeriesPanel.new( + 'Git Requests (checkout)', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + gitRequestsCheckoutQuery, + ) + + prometheus.withLegendFormat( + '{{ namespace }} - {{ repo }}' + ) + ) + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withPlacement('right') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local gitFetchPerformanceQuery = ||| + sum( + increase( + argocd_git_request_duration_seconds_bucket{ + namespace=~'$namespace', + job=~'$job', + request_type="fetch" + }[$__rate_interval] + ) + ) by (le) + |||, + + local gitFetchPerformanceHeatmapPanel = + heatmapPanel.new( + 'Git Fetch Performance', + ) + + hmQueryOptions.withTargets( + prometheus.new( + '$datasource', + gitFetchPerformanceQuery, + ) + + prometheus.withLegendFormat( + '{{ le }}' + ) + + prometheus.withFormat('heatmap') + ) + + hmStandardOptions.withUnit('short'), + + local gitLsRemotePerformanceQuery = ||| + sum( + increase( + argocd_git_request_duration_seconds_bucket{ + namespace=~'$namespace', + job=~'$job', + request_type="ls-remote" + }[$__rate_interval] + ) + ) by (le) + |||, + + local gitLsRemotePerformanceHeatmapPanel = + heatmapPanel.new( + 'Git Ls-remote Performance', + ) + + hmQueryOptions.withTargets( + prometheus.new( + '$datasource', + gitLsRemotePerformanceQuery, + ) + + prometheus.withLegendFormat( + '{{ le }}' + ) + + prometheus.withFormat('heatmap') + ) + + hmStandardOptions.withUnit('short'), + + local appsDefined = std.length($._config.applications) != 0, + local appBadgeContent = [ + '| %(name)s | %(environment)s | [![App Status](%(baseUrl)s/api/badge?name=%(applicationName)s&revision=true)](%(baseUrl)s/applications/%(applicationName)s) |' % application { + baseUrl: if std.objectHas(application, 'baseUrl') then application.baseUrl else $._config.argoCdUrl, + applicationName: if std.objectHas(application, 'applicationName') then application.applicationName else application.name, + } + for application in $._config.applications + ], + + local appUnhealthyQuery = ||| + sum( + argocd_app_info{ + %s + health_status!~"Healthy|Progressing" + } + ) by (job, dest_server, project, name, health_status) + ||| % commonLabels, + + local appUnhealthyTablePanel = + tablePanel.new( + 'Applications Unhealthy', + ) + + tbOptions.withSortBy(2) + + tbOptions.sortBy.withDesc(true) + + tbQueryOptions.withTargets( + prometheus.new( + '$datasource', + appUnhealthyQuery, + ) + + prometheus.withFormat('table') + + prometheus.withInstant(true) + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + job: 'Job', + dest_server: 'Cluster', + project: 'Project', + name: 'Application', + health_status: 'Sync Status', + }, + indexByName: { + name: 0, + project: 1, + health_status: 2, + }, + excludeByName: { + Time: true, + job: true, + dest_server: true, + Value: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('name') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withLinks( + tbPanelOptions.link.withTitle('Go To Application') + + tbPanelOptions.link.withUrl( + $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' + ) + ) + ), + tbOverride.byName.new('health_status') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.color.withMode('fixed') + + tbStandardOptions.color.withFixedColor('yellow') + + tbCustom.withDisplayMode('color-background') + ), + ]), + + local appOutOfSyncQuery = ||| + sum( + argocd_app_info{ + %s + sync_status!="Synced" + } + ) by (job, dest_server, project, name, sync_status) > 0 + ||| % commonLabels, + + local appOutOfSyncTablePanel = + tablePanel.new( + 'Applications Out Of Sync', + ) + + tbOptions.withSortBy(2) + + tbOptions.sortBy.withDesc(true) + + tbQueryOptions.withTargets( + prometheus.new( + '$datasource', + appOutOfSyncQuery, + ) + + prometheus.withFormat('table') + + prometheus.withInstant(true) + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + job: 'Job', + dest_server: 'Cluster', + project: 'Project', + name: 'Application', + sync_status: 'Sync Status', + }, + indexByName: { + name: 0, + project: 1, + sync_status: 2, + }, + excludeByName: { + Time: true, + job: true, + dest_server: true, + Value: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('name') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withLinks( + tbPanelOptions.link.withTitle('Go To Application') + + tbPanelOptions.link.withUrl( + $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' + ) + ) + ), + tbOverride.byName.new('sync_status') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.color.withMode('fixed') + + tbStandardOptions.color.withFixedColor('yellow') + + tbCustom.withDisplayMode('color-background') + ), + ]), + + local appSync7dQuery = ||| + sum( + round( + increase( + argocd_app_sync_total{ + %s + phase!="Succeeded" + }[7d] + ) + ) + ) by (job, dest_server, project, name, phase) > 0 + ||| % commonLabels, + + local appSync7dTablePanel = + tablePanel.new( + 'Applications That Failed to Sync[7d]', + ) + + tbOptions.withSortBy(2) + + tbOptions.sortBy.withDesc(true) + + tbQueryOptions.withTargets( + prometheus.new( + '$datasource', + appSync7dQuery, + ) + + prometheus.withFormat('table') + + prometheus.withInstant(true) + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + job: 'Job', + dest_server: 'Cluster', + project: 'Project', + name: 'Application', + phase: 'Phase', + Value: 'Count', + }, + indexByName: { + name: 0, + project: 1, + phase: 2, + }, + excludeByName: { + Time: true, + job: true, + dest_server: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('name') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withLinks( + tbPanelOptions.link.withTitle('Go To Application') + + tbPanelOptions.link.withUrl( + $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' + ) + ) + ), + tbOverride.byName.new('Value') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.color.withMode('fixed') + + tbStandardOptions.color.withFixedColor('yellow') + + tbCustom.withDisplayMode('color-background') + ), + ]), + + local appAutoSyncDisabledQuery = ||| + sum( + argocd_app_info{ + %s + autosync_enabled!="true" + } + ) by (job, dest_server, project, name, autosync_enabled) > 0 + ||| % commonLabels, + + local appAutoSyncDisabledTablePanel = + tablePanel.new( + 'Applications With Auto Sync Disabled', + ) + + tbOptions.withSortBy(2) + + tbOptions.sortBy.withDesc(true) + + tbQueryOptions.withTargets( + prometheus.new( + '$datasource', + appAutoSyncDisabledQuery, + ) + + prometheus.withFormat('table') + + prometheus.withInstant(true) + ) + + tbQueryOptions.withTransformations([ + tbQueryOptions.transformation.withId( + 'organize' + ) + + tbQueryOptions.transformation.withOptions( + { + renameByName: { + job: 'Job', + dest_server: 'Cluster', + project: 'Project', + name: 'Application', + autosync_enabled: 'Auto Sync Enabled', + }, + indexByName: { + name: 0, + project: 1, + autosync_enabled: 2, + }, + excludeByName: { + Time: true, + job: true, + dest_server: true, + Value: true, + }, + } + ), + ]) + + tbStandardOptions.withOverrides([ + tbOverride.byName.new('name') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.withLinks( + tbPanelOptions.link.withTitle('Go To Application') + + tbPanelOptions.link.withUrl( + $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' + ) + ) + ), + tbOverride.byName.new('autosync_enabled') + + tbOverride.byName.withPropertiesFromOptions( + tbStandardOptions.color.withMode('fixed') + + tbStandardOptions.color.withFixedColor('yellow') + + tbCustom.withDisplayMode('color-background') + ), + ]), + + local appHealthStatusByAppQuery = ||| + sum( + argocd_app_info{ + %s + name=~"$application", + } + ) by (namespace, job, dest_server, project, name, health_status) + ||| % commonLabels, + + local appHealthStatusByAppTimeSeriesPanel = + timeSeriesPanel.new( + 'Application Health Status', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + appHealthStatusByAppQuery, + ) + + prometheus.withLegendFormat( + '{{ dest_server }}/{{ project }}/{{ name }} - {{ health_status }}' + ) + ) + + tsQueryOptions.withInterval('5m') + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local appSyncStatusByAppQuery = ||| + sum( + argocd_app_info{ + %s + name=~"$application", + } + ) by (namespace, job, dest_server, project, name, sync_status) + ||| % commonLabels, + + local appSyncStatusByAppTimeSeriesPanel = + timeSeriesPanel.new( + 'Application Sync Status', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + appSyncStatusByAppQuery, + ) + + prometheus.withLegendFormat( + '{{ dest_server }}/{{ project }}/{{ name }} - {{ sync_status }}' + ) + ) + + tsQueryOptions.withInterval('5m') + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local appSyncByAppQuery = ||| + sum( + round( + increase( + argocd_app_sync_total{ + %s + name=~"$application", + }[$__rate_interval] + ) + ) + ) by (namespace, job, dest_server, project, name, phase) + ||| % commonLabels, + + local appSyncByAppTimeSeriesPanel = + timeSeriesPanel.new( + 'Application Sync Result', + ) + + tsQueryOptions.withTargets( + prometheus.new( + '$datasource', + appSyncByAppQuery, + ) + + prometheus.withLegendFormat( + '{{ dest_server }}/{{ project }}/{{ name }} - {{ phase }}' + ) + ) + + tsQueryOptions.withInterval('5m') + + tsStandardOptions.withUnit('short') + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc') + + tsLegend.withShowLegend(true) + + tsLegend.withDisplayMode('table') + + tsLegend.withCalcs(['last']) + + tsLegend.withSortBy('Last') + + tsLegend.withSortDesc(true) + + tsCustom.withFillOpacity(10), + + local summaryRow = + row.new( + 'Summary' + ), + + local syncStatsRow = + row.new( + 'Sync Stats' + ), + + local controllerStatsRow = + row.new( + 'Controller Stats' + ), + + local clusterStatsRow = + row.new( + 'Cluster Stats' + ), + + local repoServerStatsRow = + row.new( + 'Repo Server Stats', + ), + + 'argo-cd-operational-overview.json': + $._config.bypassDashboardValidation + + dashboard.new( + 'ArgoCD / Operational / Overview', + ) + + dashboard.withDescription('A dashboard that monitors ArgoCD with a focus on the operational. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin).') + + dashboard.withUid($._config.operationalOverviewDashboardUid) + + dashboard.withTags($._config.tags) + + dashboard.withTimezone('utc') + + dashboard.withEditable(true) + + dashboard.time.withFrom('now-6h') + + dashboard.time.withTo('now') + + dashboard.withVariables(variables) + + dashboard.withPanels( + [ + summaryRow + + row.gridPos.withX(0) + + row.gridPos.withY(0) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + clustersCountStatPanel + + tablePanel.gridPos.withX(0) + + tablePanel.gridPos.withY(1) + + tablePanel.gridPos.withW(6) + + tablePanel.gridPos.withH(4), + repositoriesCountStatPanel + + tablePanel.gridPos.withX(6) + + tablePanel.gridPos.withY(1) + + tablePanel.gridPos.withW(6) + + tablePanel.gridPos.withH(4), + appsCountStatPanel + + tablePanel.gridPos.withX(0) + + tablePanel.gridPos.withY(5) + + tablePanel.gridPos.withW(6) + + tablePanel.gridPos.withH(4), + appsTablePanel + + tablePanel.gridPos.withX(12) + + tablePanel.gridPos.withY(1) + + tablePanel.gridPos.withW(12) + + tablePanel.gridPos.withH(8), + ] + + [ + syncStatsRow + + row.gridPos.withX(0) + + row.gridPos.withY(9) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + ] + + grid.makeGrid( + [syncActivityTimeSeriesPanel, syncFailuresTimeSeriesPanel], + panelWidth=12, + panelHeight=6, + startY=10 + ) + + [ + controllerStatsRow + + row.gridPos.withX(0) + + row.gridPos.withY(16) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + ] + + grid.makeGrid( + [ + reconcilationActivtyTimeSeriesPanel, + reconcilationPerformanceHeatmapPanel, + k8sApiActivityTimeSeriesPanel, + pendingKubectlTimeSeriesPanel, + ], + panelWidth=12, + panelHeight=6, + startY=17 + ) + + [ + clusterStatsRow + + row.gridPos.withX(0) + + row.gridPos.withY(29) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + ] + + grid.makeGrid( + [resourceObjectsTimeSeriesPanel, apiResourcesTimeSeriesPanel, clusterEventsTimeSeriesPanel], + panelWidth=8, + panelHeight=6, + startY=30 + ) + + [ + repoServerStatsRow + + row.gridPos.withX(0) + + row.gridPos.withY(36) + + row.gridPos.withW(24) + + row.gridPos.withH(1), + ] + + grid.makeGrid( + [ + gitRequestsLsRemoteTimeSeriesPanel, + gitRequestsCheckoutTimeSeriesPanel, + gitFetchPerformanceHeatmapPanel, + gitLsRemotePerformanceHeatmapPanel, + ], + panelWidth=12, + panelHeight=6, + startY=37 + ) + ) + + if $._config.annotation.enabled then + dashboard.withAnnotations($._config.customAnnotation) + else {}, + }, +} diff --git a/dashboards/dashboards.libsonnet b/dashboards/dashboards.libsonnet index 5a54c7a..72ec9e6 100644 --- a/dashboards/dashboards.libsonnet +++ b/dashboards/dashboards.libsonnet @@ -1,3 +1,4 @@ +(import 'argo-cd-operational.libsonnet') + (import 'argo-cd-applications.libsonnet') + (import 'argo-cd-notifications.libsonnet') + {} From 3e180a071ce917de959222c71ac065ec3152f9f0 Mon Sep 17 00:00:00 2001 From: adinhodovic Date: Wed, 15 Nov 2023 11:29:20 +0100 Subject: [PATCH 2/2] feat: Add operational dashboard --- dashboards/argo-cd-applications.libsonnet | 8 +- dashboards/argo-cd-notifications.libsonnet | 2 +- dashboards/argo-cd-operational.libsonnet | 530 ++------ .../argo-cd-application-overview.json | 28 +- .../argo-cd-notifications-overview.json | 6 + .../argo-cd-operational-overview.json | 1081 +++++++++++++++++ 6 files changed, 1233 insertions(+), 422 deletions(-) create mode 100644 dashboards_out/argo-cd-operational-overview.json diff --git a/dashboards/argo-cd-applications.libsonnet b/dashboards/argo-cd-applications.libsonnet index 469a5ac..d35cafb 100644 --- a/dashboards/argo-cd-applications.libsonnet +++ b/dashboards/argo-cd-applications.libsonnet @@ -287,7 +287,7 @@ local tbOverride = tbStandardOptions.override; 'Applications Unhealthy', ) + tbOptions.withSortBy( - tbOptions.sortBy.withDisplayName('Application') + + tbOptions.sortBy.withDisplayName('Application') ) + tbQueryOptions.withTargets( prometheus.new( @@ -356,7 +356,7 @@ local tbOverride = tbStandardOptions.override; 'Applications Out Of Sync', ) + tbOptions.withSortBy( - tbOptions.sortBy.withDisplayName('Application') + + tbOptions.sortBy.withDisplayName('Application') ) + tbQueryOptions.withTargets( prometheus.new( @@ -429,7 +429,7 @@ local tbOverride = tbStandardOptions.override; 'Applications That Failed to Sync[7d]', ) + tbOptions.withSortBy( - tbOptions.sortBy.withDisplayName('Application') + + tbOptions.sortBy.withDisplayName('Application') ) + tbQueryOptions.withTargets( prometheus.new( @@ -498,7 +498,7 @@ local tbOverride = tbStandardOptions.override; 'Applications With Auto Sync Disabled', ) + tbOptions.withSortBy( - tbOptions.sortBy.withDisplayName('Application') + + tbOptions.sortBy.withDisplayName('Application') ) + tbQueryOptions.withTargets( prometheus.new( diff --git a/dashboards/argo-cd-notifications.libsonnet b/dashboards/argo-cd-notifications.libsonnet index effa66e..06d15af 100644 --- a/dashboards/argo-cd-notifications.libsonnet +++ b/dashboards/argo-cd-notifications.libsonnet @@ -172,7 +172,7 @@ local tsLegend = tsOptions.legend; dashboard.withVariables(variables) + dashboard.withPanels( [ - summaryRow, + summaryRow + row.gridPos.withX(0) + row.gridPos.withY(0) + row.gridPos.withW(24) + diff --git a/dashboards/argo-cd-operational.libsonnet b/dashboards/argo-cd-operational.libsonnet index 9ed5b20..fdec9c7 100644 --- a/dashboards/argo-cd-operational.libsonnet +++ b/dashboards/argo-cd-operational.libsonnet @@ -4,6 +4,7 @@ local row = g.panel.row; local grid = g.util.grid; local statPanel = g.panel.stat; +local pieChartPanel = g.panel.pieChart; local tablePanel = g.panel.table; local timeSeriesPanel = g.panel.timeSeries; local heatmapPanel = g.panel.heatmap; @@ -13,6 +14,12 @@ local datasource = variable.datasource; local query = variable.query; local prometheus = g.query.prometheus; +// Pie Chart +local pcOptions = pieChartPanel.options; +local pcStandardOptions = pieChartPanel.standardOptions; +local pcOverride = pcStandardOptions.override; +local pcLegend = pcOptions.legend; + // Timeseries local tsOptions = timeSeriesPanel.options; local tsStandardOptions = timeSeriesPanel.standardOptions; @@ -24,19 +31,12 @@ local tsLegend = tsOptions.legend; // Table local tbOptions = tablePanel.options; local tbStandardOptions = tablePanel.standardOptions; -local tbPanelOptions = tablePanel.panelOptions; local tbQueryOptions = tablePanel.queryOptions; -local tbFieldConfig = tablePanel.fieldConfig; -local tbCustom = tbFieldConfig.defaults.custom; -local tbOverride = tbStandardOptions.override; // HeatmapPanel -local hmOptions = heatmapPanel.options; local hmStandardOptions = heatmapPanel.standardOptions; local tbPanelOptions = tablePanel.panelOptions; local hmQueryOptions = heatmapPanel.queryOptions; -local tbFieldConfig = tablePanel.fieldConfig; -local tbCustom = tbFieldConfig.defaults.custom; local tbOverride = tbStandardOptions.override; { @@ -179,6 +179,98 @@ local tbOverride = tbStandardOptions.override; ) ), + local healthStatusQuery = ||| + sum( + argocd_app_info{ + %s + } + ) by (health_status) + ||| % commonLabels, + + local healthStatusPieChartPanel = + pieChartPanel.new( + 'Health Status', + ) + + pieChartPanel.queryOptions.withTargets( + prometheus.new( + '$datasource', + healthStatusQuery, + ) + + prometheus.withInstant(true) + + prometheus.withLegendFormat( + '{{ health_status }}' + ) + ) + + pcStandardOptions.withUnit('short') + + pcOptions.tooltip.withMode('multi') + + pcLegend.withShowLegend(true) + + pcLegend.withDisplayMode('table') + + pcLegend.withPlacement('right') + + pcLegend.withValues(['value']) + + pcStandardOptions.withOverrides([ + pcOverride.byName.new('Healthy') + + pcOverride.byName.withPropertiesFromOptions( + pcStandardOptions.color.withMode('fixed') + + pcStandardOptions.color.withFixedColor('green') + ), + pcOverride.byName.new('Degraded') + + pcOverride.byName.withPropertiesFromOptions( + pcStandardOptions.color.withMode('fixed') + + pcStandardOptions.color.withFixedColor('red') + ), + pcOverride.byName.new('Progressing') + + pcOverride.byName.withPropertiesFromOptions( + pcStandardOptions.color.withMode('fixed') + + pcStandardOptions.color.withFixedColor('yellow') + ), + ]), + + local syncStatusQuery = ||| + sum( + argocd_app_info{ + %s + } + ) by (sync_status) + ||| % commonLabels, + + local syncStatusPieChartPanel = + pieChartPanel.new( + 'Sync Status', + ) + + pieChartPanel.queryOptions.withTargets( + prometheus.new( + '$datasource', + syncStatusQuery, + ) + + prometheus.withInstant(true) + + prometheus.withLegendFormat( + '{{ sync_status }}' + ) + ) + + pcStandardOptions.withUnit('short') + + pcOptions.tooltip.withMode('multi') + + pcLegend.withShowLegend(true) + + pcLegend.withDisplayMode('table') + + pcLegend.withPlacement('right') + + pcLegend.withValues(['value']) + + pcStandardOptions.withOverrides([ + pcOverride.byName.new('Synced') + + pcOverride.byName.withPropertiesFromOptions( + pcStandardOptions.color.withMode('fixed') + + pcStandardOptions.color.withFixedColor('green') + ), + pcOverride.byName.new('OutOfSync') + + pcOverride.byName.withPropertiesFromOptions( + pcStandardOptions.color.withMode('fixed') + + pcStandardOptions.color.withFixedColor('red') + ), + pcOverride.byName.new('Unknown') + + pcOverride.byName.withPropertiesFromOptions( + pcStandardOptions.color.withMode('fixed') + + pcStandardOptions.color.withFixedColor('yellow') + ), + ]), + local appsQuery = ||| sum( argocd_app_info{ @@ -686,394 +778,6 @@ local tbOverride = tbStandardOptions.override; ) + hmStandardOptions.withUnit('short'), - local appsDefined = std.length($._config.applications) != 0, - local appBadgeContent = [ - '| %(name)s | %(environment)s | [![App Status](%(baseUrl)s/api/badge?name=%(applicationName)s&revision=true)](%(baseUrl)s/applications/%(applicationName)s) |' % application { - baseUrl: if std.objectHas(application, 'baseUrl') then application.baseUrl else $._config.argoCdUrl, - applicationName: if std.objectHas(application, 'applicationName') then application.applicationName else application.name, - } - for application in $._config.applications - ], - - local appUnhealthyQuery = ||| - sum( - argocd_app_info{ - %s - health_status!~"Healthy|Progressing" - } - ) by (job, dest_server, project, name, health_status) - ||| % commonLabels, - - local appUnhealthyTablePanel = - tablePanel.new( - 'Applications Unhealthy', - ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + - tbQueryOptions.withTargets( - prometheus.new( - '$datasource', - appUnhealthyQuery, - ) + - prometheus.withFormat('table') + - prometheus.withInstant(true) - ) + - tbQueryOptions.withTransformations([ - tbQueryOptions.transformation.withId( - 'organize' - ) + - tbQueryOptions.transformation.withOptions( - { - renameByName: { - job: 'Job', - dest_server: 'Cluster', - project: 'Project', - name: 'Application', - health_status: 'Sync Status', - }, - indexByName: { - name: 0, - project: 1, - health_status: 2, - }, - excludeByName: { - Time: true, - job: true, - dest_server: true, - Value: true, - }, - } - ), - ]) + - tbStandardOptions.withOverrides([ - tbOverride.byName.new('name') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.withLinks( - tbPanelOptions.link.withTitle('Go To Application') + - tbPanelOptions.link.withUrl( - $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' - ) - ) - ), - tbOverride.byName.new('health_status') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.color.withMode('fixed') + - tbStandardOptions.color.withFixedColor('yellow') + - tbCustom.withDisplayMode('color-background') - ), - ]), - - local appOutOfSyncQuery = ||| - sum( - argocd_app_info{ - %s - sync_status!="Synced" - } - ) by (job, dest_server, project, name, sync_status) > 0 - ||| % commonLabels, - - local appOutOfSyncTablePanel = - tablePanel.new( - 'Applications Out Of Sync', - ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + - tbQueryOptions.withTargets( - prometheus.new( - '$datasource', - appOutOfSyncQuery, - ) + - prometheus.withFormat('table') + - prometheus.withInstant(true) - ) + - tbQueryOptions.withTransformations([ - tbQueryOptions.transformation.withId( - 'organize' - ) + - tbQueryOptions.transformation.withOptions( - { - renameByName: { - job: 'Job', - dest_server: 'Cluster', - project: 'Project', - name: 'Application', - sync_status: 'Sync Status', - }, - indexByName: { - name: 0, - project: 1, - sync_status: 2, - }, - excludeByName: { - Time: true, - job: true, - dest_server: true, - Value: true, - }, - } - ), - ]) + - tbStandardOptions.withOverrides([ - tbOverride.byName.new('name') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.withLinks( - tbPanelOptions.link.withTitle('Go To Application') + - tbPanelOptions.link.withUrl( - $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' - ) - ) - ), - tbOverride.byName.new('sync_status') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.color.withMode('fixed') + - tbStandardOptions.color.withFixedColor('yellow') + - tbCustom.withDisplayMode('color-background') - ), - ]), - - local appSync7dQuery = ||| - sum( - round( - increase( - argocd_app_sync_total{ - %s - phase!="Succeeded" - }[7d] - ) - ) - ) by (job, dest_server, project, name, phase) > 0 - ||| % commonLabels, - - local appSync7dTablePanel = - tablePanel.new( - 'Applications That Failed to Sync[7d]', - ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + - tbQueryOptions.withTargets( - prometheus.new( - '$datasource', - appSync7dQuery, - ) + - prometheus.withFormat('table') + - prometheus.withInstant(true) - ) + - tbQueryOptions.withTransformations([ - tbQueryOptions.transformation.withId( - 'organize' - ) + - tbQueryOptions.transformation.withOptions( - { - renameByName: { - job: 'Job', - dest_server: 'Cluster', - project: 'Project', - name: 'Application', - phase: 'Phase', - Value: 'Count', - }, - indexByName: { - name: 0, - project: 1, - phase: 2, - }, - excludeByName: { - Time: true, - job: true, - dest_server: true, - }, - } - ), - ]) + - tbStandardOptions.withOverrides([ - tbOverride.byName.new('name') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.withLinks( - tbPanelOptions.link.withTitle('Go To Application') + - tbPanelOptions.link.withUrl( - $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' - ) - ) - ), - tbOverride.byName.new('Value') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.color.withMode('fixed') + - tbStandardOptions.color.withFixedColor('yellow') + - tbCustom.withDisplayMode('color-background') - ), - ]), - - local appAutoSyncDisabledQuery = ||| - sum( - argocd_app_info{ - %s - autosync_enabled!="true" - } - ) by (job, dest_server, project, name, autosync_enabled) > 0 - ||| % commonLabels, - - local appAutoSyncDisabledTablePanel = - tablePanel.new( - 'Applications With Auto Sync Disabled', - ) + - tbOptions.withSortBy(2) + - tbOptions.sortBy.withDesc(true) + - tbQueryOptions.withTargets( - prometheus.new( - '$datasource', - appAutoSyncDisabledQuery, - ) + - prometheus.withFormat('table') + - prometheus.withInstant(true) - ) + - tbQueryOptions.withTransformations([ - tbQueryOptions.transformation.withId( - 'organize' - ) + - tbQueryOptions.transformation.withOptions( - { - renameByName: { - job: 'Job', - dest_server: 'Cluster', - project: 'Project', - name: 'Application', - autosync_enabled: 'Auto Sync Enabled', - }, - indexByName: { - name: 0, - project: 1, - autosync_enabled: 2, - }, - excludeByName: { - Time: true, - job: true, - dest_server: true, - Value: true, - }, - } - ), - ]) + - tbStandardOptions.withOverrides([ - tbOverride.byName.new('name') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.withLinks( - tbPanelOptions.link.withTitle('Go To Application') + - tbPanelOptions.link.withUrl( - $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' - ) - ) - ), - tbOverride.byName.new('autosync_enabled') + - tbOverride.byName.withPropertiesFromOptions( - tbStandardOptions.color.withMode('fixed') + - tbStandardOptions.color.withFixedColor('yellow') + - tbCustom.withDisplayMode('color-background') - ), - ]), - - local appHealthStatusByAppQuery = ||| - sum( - argocd_app_info{ - %s - name=~"$application", - } - ) by (namespace, job, dest_server, project, name, health_status) - ||| % commonLabels, - - local appHealthStatusByAppTimeSeriesPanel = - timeSeriesPanel.new( - 'Application Health Status', - ) + - tsQueryOptions.withTargets( - prometheus.new( - '$datasource', - appHealthStatusByAppQuery, - ) + - prometheus.withLegendFormat( - '{{ dest_server }}/{{ project }}/{{ name }} - {{ health_status }}' - ) - ) + - tsQueryOptions.withInterval('5m') + - tsStandardOptions.withUnit('short') + - tsOptions.tooltip.withMode('multi') + - tsOptions.tooltip.withSort('desc') + - tsLegend.withShowLegend(true) + - tsLegend.withDisplayMode('table') + - tsLegend.withCalcs(['last']) + - tsLegend.withSortBy('Last') + - tsLegend.withSortDesc(true) + - tsCustom.withFillOpacity(10), - - local appSyncStatusByAppQuery = ||| - sum( - argocd_app_info{ - %s - name=~"$application", - } - ) by (namespace, job, dest_server, project, name, sync_status) - ||| % commonLabels, - - local appSyncStatusByAppTimeSeriesPanel = - timeSeriesPanel.new( - 'Application Sync Status', - ) + - tsQueryOptions.withTargets( - prometheus.new( - '$datasource', - appSyncStatusByAppQuery, - ) + - prometheus.withLegendFormat( - '{{ dest_server }}/{{ project }}/{{ name }} - {{ sync_status }}' - ) - ) + - tsQueryOptions.withInterval('5m') + - tsStandardOptions.withUnit('short') + - tsOptions.tooltip.withMode('multi') + - tsOptions.tooltip.withSort('desc') + - tsLegend.withShowLegend(true) + - tsLegend.withDisplayMode('table') + - tsLegend.withCalcs(['last']) + - tsLegend.withSortBy('Last') + - tsLegend.withSortDesc(true) + - tsCustom.withFillOpacity(10), - - local appSyncByAppQuery = ||| - sum( - round( - increase( - argocd_app_sync_total{ - %s - name=~"$application", - }[$__rate_interval] - ) - ) - ) by (namespace, job, dest_server, project, name, phase) - ||| % commonLabels, - - local appSyncByAppTimeSeriesPanel = - timeSeriesPanel.new( - 'Application Sync Result', - ) + - tsQueryOptions.withTargets( - prometheus.new( - '$datasource', - appSyncByAppQuery, - ) + - prometheus.withLegendFormat( - '{{ dest_server }}/{{ project }}/{{ name }} - {{ phase }}' - ) - ) + - tsQueryOptions.withInterval('5m') + - tsStandardOptions.withUnit('short') + - tsOptions.tooltip.withMode('multi') + - tsOptions.tooltip.withSort('desc') + - tsLegend.withShowLegend(true) + - tsLegend.withDisplayMode('table') + - tsLegend.withCalcs(['last']) + - tsLegend.withSortBy('Last') + - tsLegend.withSortDesc(true) + - tsCustom.withFillOpacity(10), - local summaryRow = row.new( 'Summary' @@ -1122,28 +826,38 @@ local tbOverride = tbStandardOptions.override; clustersCountStatPanel + tablePanel.gridPos.withX(0) + tablePanel.gridPos.withY(1) + - tablePanel.gridPos.withW(6) + + tablePanel.gridPos.withW(4) + tablePanel.gridPos.withH(4), repositoriesCountStatPanel + - tablePanel.gridPos.withX(6) + + tablePanel.gridPos.withX(4) + tablePanel.gridPos.withY(1) + - tablePanel.gridPos.withW(6) + + tablePanel.gridPos.withW(4) + tablePanel.gridPos.withH(4), appsCountStatPanel + + tablePanel.gridPos.withX(8) + + tablePanel.gridPos.withY(1) + + tablePanel.gridPos.withW(4) + + tablePanel.gridPos.withH(4), + healthStatusPieChartPanel + tablePanel.gridPos.withX(0) + tablePanel.gridPos.withY(5) + tablePanel.gridPos.withW(6) + - tablePanel.gridPos.withH(4), + tablePanel.gridPos.withH(6), + syncStatusPieChartPanel + + tablePanel.gridPos.withX(6) + + tablePanel.gridPos.withY(5) + + tablePanel.gridPos.withW(6) + + tablePanel.gridPos.withH(6), appsTablePanel + tablePanel.gridPos.withX(12) + tablePanel.gridPos.withY(1) + tablePanel.gridPos.withW(12) + - tablePanel.gridPos.withH(8), + tablePanel.gridPos.withH(10), ] + [ syncStatsRow + row.gridPos.withX(0) + - row.gridPos.withY(9) + + row.gridPos.withY(11) + row.gridPos.withW(24) + row.gridPos.withH(1), ] + @@ -1151,12 +865,12 @@ local tbOverride = tbStandardOptions.override; [syncActivityTimeSeriesPanel, syncFailuresTimeSeriesPanel], panelWidth=12, panelHeight=6, - startY=10 + startY=12 ) + [ controllerStatsRow + row.gridPos.withX(0) + - row.gridPos.withY(16) + + row.gridPos.withY(18) + row.gridPos.withW(24) + row.gridPos.withH(1), ] + @@ -1169,12 +883,12 @@ local tbOverride = tbStandardOptions.override; ], panelWidth=12, panelHeight=6, - startY=17 + startY=19 ) + [ clusterStatsRow + row.gridPos.withX(0) + - row.gridPos.withY(29) + + row.gridPos.withY(31) + row.gridPos.withW(24) + row.gridPos.withH(1), ] + @@ -1182,12 +896,12 @@ local tbOverride = tbStandardOptions.override; [resourceObjectsTimeSeriesPanel, apiResourcesTimeSeriesPanel, clusterEventsTimeSeriesPanel], panelWidth=8, panelHeight=6, - startY=30 + startY=32 ) + [ repoServerStatsRow + row.gridPos.withX(0) + - row.gridPos.withY(36) + + row.gridPos.withY(38) + row.gridPos.withW(24) + row.gridPos.withH(1), ] + @@ -1200,7 +914,7 @@ local tbOverride = tbStandardOptions.override; ], panelWidth=12, panelHeight=6, - startY=37 + startY=39 ) ) + if $._config.annotation.enabled then diff --git a/dashboards_out/argo-cd-application-overview.json b/dashboards_out/argo-cd-application-overview.json index 5d5cd40..dd0236c 100644 --- a/dashboards_out/argo-cd-application-overview.json +++ b/dashboards_out/argo-cd-application-overview.json @@ -5,6 +5,12 @@ "editable": true, "panels": [ { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, "id": 1, "title": "Summary by Cluster, Project", "type": "row" @@ -249,7 +255,6 @@ "type": "datasource", "uid": "-- Mixed --" }, - "desc": true, "fieldConfig": { "overrides": [ { @@ -299,7 +304,9 @@ "id": 8, "options": { "sortBy": [ - 2 + { + "displayName": "Application" + } ] }, "pluginVersion": "v10.1.0", @@ -332,7 +339,7 @@ }, "renameByName": { "dest_server": "Cluster", - "health_status": "Sync Status", + "health_status": "Health Status", "job": "Job", "name": "Application", "project": "Project" @@ -347,7 +354,6 @@ "type": "datasource", "uid": "-- Mixed --" }, - "desc": true, "fieldConfig": { "overrides": [ { @@ -397,7 +403,9 @@ "id": 9, "options": { "sortBy": [ - 2 + { + "displayName": "Application" + } ] }, "pluginVersion": "v10.1.0", @@ -445,7 +453,6 @@ "type": "datasource", "uid": "-- Mixed --" }, - "desc": true, "fieldConfig": { "overrides": [ { @@ -495,7 +502,9 @@ "id": 10, "options": { "sortBy": [ - 2 + { + "displayName": "Application" + } ] }, "pluginVersion": "v10.1.0", @@ -543,7 +552,6 @@ "type": "datasource", "uid": "-- Mixed --" }, - "desc": true, "fieldConfig": { "overrides": [ { @@ -593,7 +601,9 @@ "id": 11, "options": { "sortBy": [ - 2 + { + "displayName": "Application" + } ] }, "pluginVersion": "v10.1.0", diff --git a/dashboards_out/argo-cd-notifications-overview.json b/dashboards_out/argo-cd-notifications-overview.json index 3b1e250..0eb3686 100644 --- a/dashboards_out/argo-cd-notifications-overview.json +++ b/dashboards_out/argo-cd-notifications-overview.json @@ -5,6 +5,12 @@ "editable": true, "panels": [ { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, "id": 1, "title": "Summary", "type": "row" diff --git a/dashboards_out/argo-cd-operational-overview.json b/dashboards_out/argo-cd-operational-overview.json new file mode 100644 index 0000000..60b318b --- /dev/null +++ b/dashboards_out/argo-cd-operational-overview.json @@ -0,0 +1,1081 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "description": "A dashboard that monitors ArgoCD with a focus on the operational. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin).", + "editable": true, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "title": "Summary", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_cluster_info{\n namespace=~'$namespace',\n job=~'$job'\n }\n)\n" + } + ], + "title": "Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 3, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "count(\n count(\n argocd_app_info{\n namespace=~'$namespace',\n job=~'$job'\n }\n )\n by (repo)\n)\n" + } + ], + "title": "Repositories", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 4, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_app_info{\n namespace=~'$namespace',\njob=~'$job',\ndest_server=~'$cluster',\nproject=~'$project',\n\n }\n)\n" + } + ], + "title": "Applications", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Healthy" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Degraded" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Progressing" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 5 + }, + "id": 5, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value" + ] + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_app_info{\n namespace=~'$namespace',\njob=~'$job',\ndest_server=~'$cluster',\nproject=~'$project',\n\n }\n) by (health_status)\n", + "instant": true, + "legendFormat": "{{ health_status }}" + } + ], + "title": "Health Status", + "type": "piechart" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Synced" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OutOfSync" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unknown" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 5 + }, + "id": 6, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value" + ] + }, + "tooltip": { + "mode": "multi" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_app_info{\n namespace=~'$namespace',\njob=~'$job',\ndest_server=~'$cluster',\nproject=~'$project',\n\n }\n) by (sync_status)\n", + "instant": true, + "legendFormat": "{{ sync_status }}" + } + ], + "title": "Sync Status", + "type": "piechart" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "name" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Go To Application", + "type": "dashboard", + "url": "/d/argo-cd-application-overview-kask/argocd-notifications-overview?&var-project=${__data.fields.Project}&var-application=${__value.raw}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 7, + "options": { + "sortBy": [ + { + "displayName": "Application" + } + ] + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_app_info{\n namespace=~'$namespace',\njob=~'$job',\ndest_server=~'$cluster',\nproject=~'$project',\n\n }\n) by (job, dest_server, project, name, health_status, sync_status)\n", + "format": "table", + "instant": true + } + ], + "title": "Applications", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "dest_server": true, + "job": true + }, + "indexByName": { + "health_status": 2, + "name": 0, + "project": 1, + "sync_status": 3 + }, + "renameByName": { + "dest_server": "Cluster", + "health_status": "Health Status", + "job": "Job", + "name": "Application", + "project": "Project", + "sync_status": "Sync Status" + } + } + } + ], + "type": "table" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 8, + "title": "Sync Stats", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n round(\n increase(\n argocd_app_sync_total{\n namespace=~'$namespace',\njob=~'$job',\ndest_server=~'$cluster',\nproject=~'$project',\n\n }[$__rate_interval]\n )\n )\n) by (job, dest_server, project, name)\n", + "legendFormat": "{{ dest_server }}/{{ project }}/{{ name }}" + } + ], + "title": "Sync Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n round(\n increase(\n argocd_app_sync_total{\n namespace=~'$namespace',\njob=~'$job',\ndest_server=~'$cluster',\nproject=~'$project',\n\n phase=~\"Error|Failed\"\n }[$__rate_interval]\n )\n )\n) by (job, dest_server, project, application, phase)\n", + "legendFormat": "{{ dest_server }}/{{ project }}/{{ application }} - {{ phase }}" + } + ], + "title": "Sync Failures", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 11, + "title": "Controller Stats", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n round(\n increase(\n argocd_app_reconcile_count{\n namespace=~'$namespace',\n job=~'$job',\n dest_server=~'$cluster'\n }[$__rate_interval]\n )\n )\n) by (namespace, job, dest_server)\n", + "legendFormat": "{{ namespace }}/{{ dest_server }}" + } + ], + "title": "Recociliation Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 13, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n increase(\n argocd_app_reconcile_bucket{\n namespace=~'$namespace',\n job=~'$job',\n dest_server=~'$cluster'\n }[$__rate_interval]\n )\n) by (le)\n", + "format": "heatmap", + "legendFormat": "{{ le }}" + } + ], + "title": "Reconciliation Performance", + "type": "heatmap" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n round(\n increase(\n argocd_app_k8s_request_total{\n namespace=~'$namespace',\n job=~'$job',\n project=~'$project'\n }[$__rate_interval]\n )\n )\n) by (job, server, project, verb, resource_kind)\n", + "legendFormat": "{{ server }}/{{ project }} - {{ verb }}/{{ resource_kind }}" + } + ], + "title": "K8s API Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_kubectl_exec_pending{\n namespace=~'$namespace',\n job=~'$job'\n }\n) by (job, command)\n", + "legendFormat": "{{ dest_server }} - {{ command }}" + } + ], + "title": "Pending Kubectl Runs", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 16, + "title": "Cluster Stats", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_cluster_api_resource_objects{\n namespace=~'$namespace',\n job=~'$job',\n server=~'$cluster'\n }\n) by (namespace, job, server)\n", + "legendFormat": "{{ server }}" + } + ], + "title": "Resource Objects", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n argocd_cluster_api_resources{\n namespace=~'$namespace',\n job=~'$job',\n server=~'$cluster'\n }\n) by (namespace, job, server)\n", + "legendFormat": "{{ server }}" + } + ], + "title": "API Resources", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n increase(\n argocd_cluster_events_total{\n namespace=~'$namespace',\n job=~'$job',\n server=~'$cluster'\n }[$__rate_interval]\n )\n) by (namespace, job, server)\n", + "legendFormat": "{{ server }}" + } + ], + "title": "Cluster Events", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 20, + "title": "Repo Server Stats", + "type": "row" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n increase(\n argocd_git_request_total{\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"ls-remote\"\n }[$__rate_interval]\n )\n) by (namespace, job, repo)\n", + "legendFormat": "{{ namespace }} - {{ repo }}" + } + ], + "title": "Git Requests (ls-remote)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n increase(\n argocd_git_request_total{\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"fetch\"\n }[$__rate_interval]\n )\n) by (namespace, job, repo)\n", + "legendFormat": "{{ namespace }} - {{ repo }}" + } + ], + "title": "Git Requests (checkout)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 23, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n increase(\n argocd_git_request_duration_seconds_bucket{\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"fetch\"\n }[$__rate_interval]\n )\n) by (le)\n", + "format": "heatmap", + "legendFormat": "{{ le }}" + } + ], + "title": "Git Fetch Performance", + "type": "heatmap" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 24, + "pluginVersion": "v10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "expr": "sum(\n increase(\n argocd_git_request_duration_seconds_bucket{\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"ls-remote\"\n }[$__rate_interval]\n )\n) by (le)\n", + "format": "heatmap", + "legendFormat": "{{ le }}" + } + ], + "title": "Git Ls-remote Performance", + "type": "heatmap" + } + ], + "schemaVersion": 36, + "tags": [ + "ci/cd", + "argo-cd" + ], + "templating": { + "list": [ + { + "label": "Data Source", + "name": "datasource", + "query": "prometheus", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Namespace", + "multi": true, + "name": "namespace", + "query": "label_values(argocd_app_info{}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Job", + "multi": true, + "name": "job", + "query": "label_values(job)", + "refresh": 2, + "regex": "argo.*", + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Cluster", + "multi": true, + "name": "cluster", + "query": "label_values(argocd_app_info{namespace=~\"$namespace\", job=~\"$job\"}, dest_server)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "includeAll": true, + "label": "Project", + "multi": true, + "name": "project", + "query": "label_values(argocd_app_info{namespace=~\"$namespace\", job=~\"$job\", dest_server=~\"$cluster\"}, project)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "utc", + "title": "ArgoCD / Operational / Overview", + "uid": "argo-cd-operational-overview-kask" +}