From c7f43840deb95a7737c785db82d1543253e669bd Mon Sep 17 00:00:00 2001 From: Muzammil Date: Tue, 8 Oct 2024 16:42:47 +0500 Subject: [PATCH] CTO-441 Enable ceph bucket monitoring (#362) --- .../grafana-dashboards/ceph-objectstore.json | 1108 +++++++++++++++++ .../database-metrics-architecture.svg | 4 + docs/monitoring/database-metrics.md | 11 + .../dashboards-rook-ceph.yaml | 15 + .../base/utils-post-config/kustomization.yaml | 1 + .../rook-ceph-objectstore-exporter.yaml | 113 ++ .../base/monitoring-post-config.yaml | 3 +- gitops/argo-apps/base/monitoring-pre.yaml | 3 +- gitops/argo-apps/base/monitoring.yaml | 3 +- gitops/argo-apps/base/utils-post-config.yaml | 3 + .../env-onboard-config/netbird.tf | 5 + 11 files changed, 1263 insertions(+), 6 deletions(-) create mode 100644 assets/grafana-dashboards/ceph-objectstore.json create mode 100644 docs/monitoring/database-metrics-architecture.svg create mode 100644 docs/monitoring/database-metrics.md create mode 100644 gitops/applications/base/utils-post-config/rook-ceph-objectstore-exporter.yaml diff --git a/assets/grafana-dashboards/ceph-objectstore.json b/assets/grafana-dashboards/ceph-objectstore.json new file mode 100644 index 000000000..09e0dd894 --- /dev/null +++ b/assets/grafana-dashboards/ceph-objectstore.json @@ -0,0 +1,1108 @@ +{ + "__inputs": [ + { + "name": "DS_MIMIR", + "label": "Mimir", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.1.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Indicates buckets which are very near to bucket quota limit. NOTE: bucket quota is being enforced using users quota", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 12, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(radosgw_usage_user_total_bytes{cluster=~\"$cluster\",user=~\"$user\"}) by (user) / \nsum(radosgw_usage_user_quota_size_bytes{cluster=~\"$cluster\",user=~\"$user\"} ) by (user) > 0.8 ", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "B" + } + ], + "title": "Users approaching max quota limit", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "user", + "Value" + ] + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 6, + "panels": [], + "title": "User metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Total size of data owned by the user across all buckets", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "sum(radosgw_usage_user_total_bytes{cluster=~\"$cluster\",user=~\"$user\"}) by (user)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "User data Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Maximum data storage capacity for the given user", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "sum(radosgw_usage_user_quota_size_bytes{cluster=~\"$cluster\",user=~\"$user\"}) by (user)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "User data quota", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Percentage of storage quota used by the user", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "sum(radosgw_usage_user_total_bytes{cluster=~\"$cluster\",user=~\"$user\"}) by (user) / \nsum(radosgw_usage_user_quota_size_bytes{cluster=~\"$cluster\",user=~\"$user\"} ) by (user)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "User data quota utilization (%)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 5, + "panels": [], + "title": "Bucket metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Total size of the bucket", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 17 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "sum(radosgw_usage_bucket_bytes{cluster=~\"$cluster\",bucket=~\"$bucket\"}) by (bucket)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Bucket Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Max. size the bucket is allowed to grow. 0 indicates unlimited", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 17 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "sum(radosgw_usage_bucket_quota_size_bytes{cluster=~\"$cluster\",bucket=~\"$bucket\"} ) by (bucket)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Bucket size quota", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Percentage of bucket storage quota utilization", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 17 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "\n(sum(radosgw_usage_bucket_bytes{cluster=~\"$cluster\",bucket=~\"$bucket\"}) by (bucket) / \nsum(radosgw_usage_bucket_quota_size_bytes{cluster=~\"$cluster\",bucket=~\"$bucket\"} + 1) by (bucket) ) <=1 ", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Bucket quota utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "No. of objects in the bucket", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sishort" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "sum(radosgw_usage_bucket_objects{cluster=~\"$cluster\",bucket=~\"$bucket\"}) by (bucket)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Bucket object count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Max No. of objects allowed in the bucket. -1 indicates \"no limit\"", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "sishort" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_MIMIR}" + }, + "editorMode": "code", + "expr": "sum(radosgw_usage_bucket_quota_size_objects{cluster=~\"$cluster\",bucket=~\"$bucket\"}) by (bucket)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Bucket object quota", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(radosgw_usage_bucket_bytes{cluster=~\"$cluster\"},bucket)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "bucket", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(radosgw_usage_bucket_bytes{cluster=~\"$cluster\"},bucket)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(radosgw_usage_user_total_bytes{cluster=~\"$cluster\"},user)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "user", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(radosgw_usage_user_total_bytes{cluster=~\"$cluster\"},user)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Ceph - Object storage", + "uid": "fdzqm640fnpj4b-v002", + "version": 6, + "weekStart": "" + } \ No newline at end of file diff --git a/docs/monitoring/database-metrics-architecture.svg b/docs/monitoring/database-metrics-architecture.svg new file mode 100644 index 000000000..a28e44d75 --- /dev/null +++ b/docs/monitoring/database-metrics-architecture.svg @@ -0,0 +1,4 @@ + + + +
db container
db container
metrics container
metrics container
database pod
database pod
prometheus
prometheus
prometheus pod
prometheus pod
scraps
metrics
scraps...
grafana
grafana
grafana pod
grafana pod
mojaloop 
k8 cluster
mojaloop...
self managed database 
self managed database 
metrics container
metrics container
exporter pod
exporter pod
prometheus
prometheus
prometheus pod
prometheus pod
scraps
metrics
scraps...
grafana
grafana
grafana pod
grafana pod
mojaloop 
k8 cluster
mojaloop...
cloud managed database 
cloud managed database 
managed db 
service 
managed db...
pulls 
metrics data
pulls...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/monitoring/database-metrics.md b/docs/monitoring/database-metrics.md new file mode 100644 index 000000000..dcc0a1805 --- /dev/null +++ b/docs/monitoring/database-metrics.md @@ -0,0 +1,11 @@ +# Context +The database (eg. mysql/mongo) may run inside the mojaloop cluster or may run as managed database (e.g. AWS RDS). In both cases, we want to gather database metrics for operational visibility + +# Problem +How do we show the same metrics to ops team when the database is running as managed instance (e.g. AWS RDS). + +# Solution + +In case of self managed database, the exporter runs as a side car container with the database container in the same k8s pod. When the database runs as external managed service (eg. AWS RDS), we deploy a standalone exporter instance. This exporter instance pulls the metrics data from the database and converts them to prometheus format. + +![diagram](./database-metrics-architecture.svg) \ No newline at end of file diff --git a/gitops/applications/base/monitoring-post-config/dashboards-rook-ceph.yaml b/gitops/applications/base/monitoring-post-config/dashboards-rook-ceph.yaml index 5e4d168ef..24b3b2fd2 100644 --- a/gitops/applications/base/monitoring-post-config/dashboards-rook-ceph.yaml +++ b/gitops/applications/base/monitoring-post-config/dashboards-rook-ceph.yaml @@ -54,4 +54,19 @@ spec: grafanaCom: id: 5342 revision: 9 +--- +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: ceph-objectstore +spec: + folder: storage + instanceSelector: + matchLabels: + dashboards: "grafana" + datasources: + - inputName: "DS_PROMETHEUS" + datasourceName: "${ARGOCD_ENV_dashboard_datasource_name}" + url: "https://raw.githubusercontent.com/mojaloop/iac-modules/${ARGOCD_ENV_monitoring_application_gitrepo_tag}/assets/grafana-dashboards/ceph-objectstore.json" + --- \ No newline at end of file diff --git a/gitops/applications/base/utils-post-config/kustomization.yaml b/gitops/applications/base/utils-post-config/kustomization.yaml index 5273f5bac..cfbf7edd9 100644 --- a/gitops/applications/base/utils-post-config/kustomization.yaml +++ b/gitops/applications/base/utils-post-config/kustomization.yaml @@ -3,6 +3,7 @@ kind: Kustomization resources: - rbac.yaml - crossplane-providers.yaml + - rook-ceph-objectstore-exporter.yaml secretGenerator: - name: terraformrc diff --git a/gitops/applications/base/utils-post-config/rook-ceph-objectstore-exporter.yaml b/gitops/applications/base/utils-post-config/rook-ceph-objectstore-exporter.yaml new file mode 100644 index 000000000..291aec4ee --- /dev/null +++ b/gitops/applications/base/utils-post-config/rook-ceph-objectstore-exporter.yaml @@ -0,0 +1,113 @@ +apiVersion: ceph.rook.io/v1 +kind: CephObjectStoreUser +metadata: + name: ceph-objectstore-exporter-user + namespace: ${ARGOCD_ENV_rook_ceph_namespace} +spec: + store: ceph-objectstore + displayName: ceph-objectstore-exporter-user + clusterNamespace: ${ARGOCD_ENV_rook_ceph_namespace} + capabilities: + bucket: read + metadata: read + usage: read + user: read + + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ceph-objectstore-exporter + namespace: ${ARGOCD_ENV_rook_ceph_namespace} + labels: + app.kubernetes.io/name: ceph-objectstore-exporter +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: ceph-objectstore-exporter + template: + metadata: + labels: + app.kubernetes.io/name: ceph-objectstore-exporter + spec: + containers: + - image: ghcr.io/pando85/radosgw_usage_exporter:latest + env: + - name: ACCESS_KEY + valueFrom: + secretKeyRef: + key: AccessKey + name: rook-ceph-object-user-ceph-objectstore-ceph-objectstore-exporter-user + - name: SECRET_KEY + valueFrom: + secretKeyRef: + key: SecretKey + name: rook-ceph-object-user-ceph-objectstore-ceph-objectstore-exporter-user + - name: RADOSGW_SERVER + valueFrom: + secretKeyRef: + key: Endpoint + name: rook-ceph-object-user-ceph-objectstore-ceph-objectstore-exporter-user + args: + - --insecure + name: exporter + ports: + - containerPort: 9242 + name: http + protocol: TCP + resources: + limits: + cpu: 400m + memory: 512Mi + requests: + cpu: 100m + memory: 64Mi + livenessProbe: + tcpSocket: + port: http + readinessProbe: + tcpSocket: + port: http + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 +--- +apiVersion: v1 +kind: Service +metadata: + name: ceph-objectstore-exporter + namespace: ${ARGOCD_ENV_rook_ceph_namespace} + labels: + app.kubernetes.io/name: ceph-objectstore-exporter +spec: + selector: + app.kubernetes.io/name: ceph-objectstore-exporter + ports: + - name: http + port: 9242 + protocol: TCP + targetPort: http +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: ceph-objectstore-exporter + namespace: ${ARGOCD_ENV_rook_ceph_namespace} + labels: + app.kubernetes.io/name: ceph-objectstore-exporter +spec: + selector: + matchLabels: + app.kubernetes.io/name: ceph-objectstore-exporter + endpoints: + - interval: 1m + port: http + diff --git a/gitops/argo-apps/base/monitoring-post-config.yaml b/gitops/argo-apps/base/monitoring-post-config.yaml index b426fcda2..572c6f723 100644 --- a/gitops/argo-apps/base/monitoring-post-config.yaml +++ b/gitops/argo-apps/base/monitoring-post-config.yaml @@ -31,8 +31,7 @@ spec: source: repoURL: ${ARGOCD_ENV_argocd_repo_url} - # targetRevision: ${ARGOCD_ENV_monitoring_application_gitrepo_tag} - targetRevision: feature/ccv2-common-branch + targetRevision: ${ARGOCD_ENV_monitoring_application_gitrepo_tag} path: gitops/applications/base/monitoring-post-config plugin: name: envsubst diff --git a/gitops/argo-apps/base/monitoring-pre.yaml b/gitops/argo-apps/base/monitoring-pre.yaml index 6ec85be54..d6c975225 100644 --- a/gitops/argo-apps/base/monitoring-pre.yaml +++ b/gitops/argo-apps/base/monitoring-pre.yaml @@ -31,8 +31,7 @@ spec: source: repoURL: ${ARGOCD_ENV_argocd_repo_url} - # targetRevision: ${ARGOCD_ENV_monitoring_application_gitrepo_tag} - targetRevision: feature/ccv2-common-branch + targetRevision: ${ARGOCD_ENV_monitoring_application_gitrepo_tag} path: gitops/applications/base/monitoring-pre plugin: name: envsubst diff --git a/gitops/argo-apps/base/monitoring.yaml b/gitops/argo-apps/base/monitoring.yaml index 25efb0fec..2878301e4 100644 --- a/gitops/argo-apps/base/monitoring.yaml +++ b/gitops/argo-apps/base/monitoring.yaml @@ -57,8 +57,7 @@ spec: source: repoURL: ${ARGOCD_ENV_argocd_repo_url} - # targetRevision: ${ARGOCD_ENV_monitoring_application_gitrepo_tag} - targetRevision: feature/ccv2-common-branch + targetRevision: ${ARGOCD_ENV_monitoring_application_gitrepo_tag} path: gitops/applications/base/monitoring plugin: name: envsubst diff --git a/gitops/argo-apps/base/utils-post-config.yaml b/gitops/argo-apps/base/utils-post-config.yaml index d7b5a272d..490c6a09d 100644 --- a/gitops/argo-apps/base/utils-post-config.yaml +++ b/gitops/argo-apps/base/utils-post-config.yaml @@ -47,3 +47,6 @@ spec: - name: "crossplane_namespace" value: "${ARGOCD_ENV_utils_crossplane_namespace}" + + - name: "rook_ceph_namespace" + value: "${ARGOCD_ENV_utils_rook_ceph_namespace}" diff --git a/terraform/config-params/ccnew-config/env-onboard-config/netbird.tf b/terraform/config-params/ccnew-config/env-onboard-config/netbird.tf index 0e62cd553..4d3443fd8 100644 --- a/terraform/config-params/ccnew-config/env-onboard-config/netbird.tf +++ b/terraform/config-params/ccnew-config/env-onboard-config/netbird.tf @@ -44,6 +44,11 @@ resource "netbird_group" "env_backtunnel" { create_before_destroy = true } } + +resource "netbird_group" "env_backtunnel" { + count = var.k8s_cluster_type == "eks" ? 1 : 0 + name = "${var.env_name}-backtunnel" +} #route to allow private traffic into en k8s network from cc user group and the env_users group, env gw is the gateway peer resource "netbird_route" "env_k8s" { description = "${var.env_name}-k8s"