diff --git a/alerts/google-cloud-chronicle/README.md b/alerts/google-cloud-chronicle/README.md new file mode 100644 index 0000000000..c47f6f1bf8 --- /dev/null +++ b/alerts/google-cloud-chronicle/README.md @@ -0,0 +1,18 @@ +# Alerts for Chronicle + +### Silent Forwarder + +This alert policy detects the absence of data for a chronicle collector with collector_id = 10479925-878c-11e7-9421-10604b7cb5c1 over a 1 hour window. These generally require further investigation and indicate an issue with the Chronicle collector. + +### All silent Chronicle forwarder and logtype combinations + +This alert policy fires an alert everytime a chronicle forwarder goes silent for a log type. Eg: If 4 forwarders are setup supplying 5 log types each, there would be 20 alerts firing (one for each combination). Similarly if a single chronicle forwarder goes down 5 alerts will be active. + +### All silent Chronicle forwarder and logtype combinations except few logtypes + +This alert policy similar to the above alert policy except it will not fire alerts for the excluded log types. In context of this template it won't fire alerts if Chronicle forwarders stop sending logs for BIND_DNS, CS_DETECTS or BRO_DNS. + + +### Forwarder buffer usage threshold + +This alert policy sends out alerts when any Chronicle forwarder collecting logs from pcap has mean buffer usage above 1% for a 1 hour time window. diff --git a/alerts/google-cloud-chronicle/all-silent-forwarder-logtype-combinations-except-few-logtypes.v1.json b/alerts/google-cloud-chronicle/all-silent-forwarder-logtype-combinations-except-few-logtypes.v1.json new file mode 100644 index 0000000000..42eae030a8 --- /dev/null +++ b/alerts/google-cloud-chronicle/all-silent-forwarder-logtype-combinations-except-few-logtypes.v1.json @@ -0,0 +1,28 @@ +{ + "displayName": "sample policy to detect all silent Chronicle forwarder and logtype combinations except few logtypes", + "conditions": [ + { + "displayName": "chronicle forwarder and logtypes silent for 1 hour except few", + "conditionAbsent": { + "aggregations": [ + { + "alignmentPeriod": "3600s", + "crossSeriesReducer": "REDUCE_MEAN", + "groupByFields": [ + "resource.label.collector_id", + "resource.label.log_type" + ], + "perSeriesAligner": "ALIGN_DELTA" + } + ], + "duration": "3600s", + "filter": "resource.type = \"chronicle.googleapis.com/Collector\" AND resource.labels.log_type != one_of(\"BIND_DNS\", \"BRO_DNS\", \"CS_DETECTS\") AND metric.type = \"chronicle.googleapis.com/ingestion/log/record_count\"", + "trigger": { + "count": 1 + } + } + } + ], + "combiner": "OR", + "enabled": true +} \ No newline at end of file diff --git a/alerts/google-cloud-chronicle/all-silent-forwarder-logtype-combinations.v1.json b/alerts/google-cloud-chronicle/all-silent-forwarder-logtype-combinations.v1.json new file mode 100644 index 0000000000..bd481207bd --- /dev/null +++ b/alerts/google-cloud-chronicle/all-silent-forwarder-logtype-combinations.v1.json @@ -0,0 +1,28 @@ +{ + "displayName": "sample policy to detect all silent Chronicle forwarder and logtype combinations", + "conditions": [ + { + "displayName": "chronicle forwarder and logtypes silent for 1 hour", + "conditionAbsent": { + "aggregations": [ + { + "alignmentPeriod": "3600s", + "crossSeriesReducer": "REDUCE_MEAN", + "groupByFields": [ + "resource.label.collector_id", + "resource.label.log_type" + ], + "perSeriesAligner": "ALIGN_DELTA" + } + ], + "duration": "3600s", + "filter": "resource.type = \"chronicle.googleapis.com/Collector\" AND metric.type = \"chronicle.googleapis.com/ingestion/log/record_count\"", + "trigger": { + "count": 1 + } + } + } + ], + "combiner": "OR", + "enabled": true +} \ No newline at end of file diff --git a/alerts/google-cloud-chronicle/forwarder-buffer-usage-more-than-threshold-with-filters.v1.json b/alerts/google-cloud-chronicle/forwarder-buffer-usage-more-than-threshold-with-filters.v1.json new file mode 100644 index 0000000000..820b2f6cfb --- /dev/null +++ b/alerts/google-cloud-chronicle/forwarder-buffer-usage-more-than-threshold-with-filters.v1.json @@ -0,0 +1,29 @@ +{ + "displayName": "sample policy to detect forwarder mean buffer used is more than 1% over a 1 hour window for input type pcap and buffer type memory", + "conditions": [ + { + "displayName": "forwarder mean buffer used is more than 1% over 1 hour window", + "conditionThreshold": { + "aggregations": [ + { + "alignmentPeriod": "3600s", + "crossSeriesReducer": "REDUCE_MEAN", + "groupByFields": [ + "resource.label.project_id" + ], + "perSeriesAligner": "ALIGN_MEAN" + } + ], + "comparison": "COMPARISON_GT", + "duration": "0s", + "filter": "resource.type = \"chronicle.googleapis.com/Collector\" AND metric.type = \"chronicle.googleapis.com/forwarder/buffer_used\" AND (metric.labels.input_type = \"pcap\" AND metric.labels.buffer_type = \"memory\")", + "thresholdValue": 0.01, + "trigger": { + "count": 1 + } + } + } + ], + "combiner": "OR", + "enabled": true +} \ No newline at end of file diff --git a/alerts/google-cloud-chronicle/metadata.yaml b/alerts/google-cloud-chronicle/metadata.yaml new file mode 100644 index 0000000000..2b019aa752 --- /dev/null +++ b/alerts/google-cloud-chronicle/metadata.yaml @@ -0,0 +1,29 @@ +alert_policy_templates: +- + id: silent-forwarder + description: "sample policy to detect a single silent Chronicle forwarder using collector_id filter" + version: 1 + related_integrations: + - id: chronicle_security + platform: GCP +- + id: forwarder-buffer-usage-more-than-threshold-with-filters + description: "sample policy to detect forwarder mean buffer used is more than 1% over a 1 hour window for input type pcap and buffer type memory" + version: 1 + related_integrations: + - id: chronicle_security + platform: GCP +- + id: all-silent-forwarder-logtype-combinations-except-few-logtypes + description: "sample policy to detect all silent Chronicle forwarder and logtype combinations except few logtypes" + version: 1 + related_integrations: + - id: chronicle_security + platform: GCP +- + id: all-silent-forwarder-logtype-combinations + description: "sample policy to detect all silent Chronicle forwarder and logtype combinations" + version: 1 + related_integrations: + - id: chronicle_security + platform: GCP \ No newline at end of file diff --git a/alerts/google-cloud-chronicle/silent-forwarder.v1.json b/alerts/google-cloud-chronicle/silent-forwarder.v1.json new file mode 100644 index 0000000000..986b37df2e --- /dev/null +++ b/alerts/google-cloud-chronicle/silent-forwarder.v1.json @@ -0,0 +1,27 @@ +{ + "displayName": "sample policy to detect a single silent Chronicle forwarder using collector_id filter", + "conditions": [ + { + "displayName": "chronicle forwarder silent for 1 hour", + "conditionAbsent": { + "aggregations": [ + { + "alignmentPeriod": "3600s", + "crossSeriesReducer": "REDUCE_MEAN", + "groupByFields": [ + "resource.label.project_id" + ], + "perSeriesAligner": "ALIGN_DELTA" + } + ], + "duration": "3600s", + "filter": "resource.type = \"chronicle.googleapis.com/Collector\" AND resource.labels.collector_id = \"10479925-878c-11e7-9421-10604b7cb5c1\" AND metric.type = \"chronicle.googleapis.com/ingestion/log/record_count\"", + "trigger": { + "count": 1 + } + } + } + ], + "combiner": "OR", + "enabled": true +} \ No newline at end of file diff --git a/alerts/google-cloud-redis/redis-engine-cpu-utilization-high.v1.json b/alerts/google-cloud-redis/redis-engine-cpu-utilization-high.v1.json index c58d34a0d8..7ad19a9f2e 100644 --- a/alerts/google-cloud-redis/redis-engine-cpu-utilization-high.v1.json +++ b/alerts/google-cloud-redis/redis-engine-cpu-utilization-high.v1.json @@ -7,7 +7,9 @@ "userLabels": { "context": "${CONTEXT}", "resource_type": "${RESOURCE_TYPE}", - "instance_id": "${INSTANCE_NAME}" + "project_id": "${PROJECT_ID}", + "region": "${REGION}", + "instance_id": "${INSTANCE_ID}" }, "conditions": [ { diff --git a/alerts/google-cloud-redis/standard-instance-failover.v1.json b/alerts/google-cloud-redis/standard-instance-failover.v1.json index 46f45b3d51..7822cc1881 100644 --- a/alerts/google-cloud-redis/standard-instance-failover.v1.json +++ b/alerts/google-cloud-redis/standard-instance-failover.v1.json @@ -7,7 +7,9 @@ "userLabels": { "context": "${CONTEXT}", "resource_type": "${RESOURCE_TYPE}", - "instance_id": "${INSTANCE_NAME}" + "project_id": "${PROJECT_ID}", + "region": "${REGION}", + "instance_id": "${INSTANCE_ID}" }, "conditions": [ { diff --git a/alerts/google-cloud-redis/system-memory-usage-ratio-high.v1.json b/alerts/google-cloud-redis/system-memory-usage-ratio-high.v1.json index f254d3045f..6516787d39 100644 --- a/alerts/google-cloud-redis/system-memory-usage-ratio-high.v1.json +++ b/alerts/google-cloud-redis/system-memory-usage-ratio-high.v1.json @@ -7,7 +7,9 @@ "userLabels": { "context": "${CONTEXT}", "resource_type": "${RESOURCE_TYPE}", - "instance_id": "${INSTANCE_NAME}" + "project_id": "${PROJECT_ID}", + "region": "${REGION}", + "instance_id": "${INSTANCE_ID}" }, "conditions": [ { diff --git a/dashboards/nginx-ingress/README.md b/dashboards/ingress-nginx/README.md similarity index 100% rename from dashboards/nginx-ingress/README.md rename to dashboards/ingress-nginx/README.md diff --git a/dashboards/nginx-ingress/metadata.yaml b/dashboards/ingress-nginx/metadata.yaml similarity index 87% rename from dashboards/nginx-ingress/metadata.yaml rename to dashboards/ingress-nginx/metadata.yaml index 50a56fcb97..d69b7096a0 100644 --- a/dashboards/nginx-ingress/metadata.yaml +++ b/dashboards/ingress-nginx/metadata.yaml @@ -2,7 +2,7 @@ sample_dashboards: - category: Nginx id: nginx-ingress-prometheus - display_name: Nginx Ingress + display_name: Nginx Ingress Prometheus Overview description: |- This dashboard has charts displaying: Controller Request Volume, Controller Connections, Config Reloads, Ingress Request Rate, Ingress Request Volume, Network I/O Pressure, Memory Usage, and CPU Usage related_integrations: diff --git a/dashboards/nginx-ingress/nginx-ingress-prometheus.json b/dashboards/ingress-nginx/nginx-ingress-prometheus.json similarity index 100% rename from dashboards/nginx-ingress/nginx-ingress-prometheus.json rename to dashboards/ingress-nginx/nginx-ingress-prometheus.json diff --git a/dashboards/nginx-ingress/nginx-ingress-prometheus.png b/dashboards/ingress-nginx/nginx-ingress-prometheus.png similarity index 100% rename from dashboards/nginx-ingress/nginx-ingress-prometheus.png rename to dashboards/ingress-nginx/nginx-ingress-prometheus.png diff --git a/dashboards/nvidia-gpu/metadata.yaml b/dashboards/nvidia-gpu/metadata.yaml index 58d9a82a95..b547f7a0b6 100644 --- a/dashboards/nvidia-gpu/metadata.yaml +++ b/dashboards/nvidia-gpu/metadata.yaml @@ -5,11 +5,6 @@ sample_dashboards: display_name: NVIDIA GPU Monitoring Overview (GCE & GKE) description: |- Displays GPU metrics for both GKE Nodes and GCE VMs. GPU metrics for the GCE VMs require the Ops Agent to be installed. - related_integrations: - - id: nvml - platform: GCE - - id: dcgm - platform: GCE - category: NVIDIA GPUs id: nvidia-dcgm diff --git a/dashboards/nvidia-gpu/nvidia-dcgm.json b/dashboards/nvidia-gpu/nvidia-dcgm.json index cf7fad856b..8ffdb251bb 100644 --- a/dashboards/nvidia-gpu/nvidia-dcgm.json +++ b/dashboards/nvidia-gpu/nvidia-dcgm.json @@ -18,7 +18,7 @@ "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.pipe_utilization'\n" + "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.profiling.pipe_utilization'\n" } } ], @@ -46,7 +46,7 @@ "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.pcie_traffic_rate'\n| cast_units(\"By/s\")" + "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.profiling.pcie_traffic_rate'\n| cast_units(\"By/s\")" } } ], @@ -74,7 +74,7 @@ "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.sm_utilization'\n" + "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.profiling.sm_utilization'\n" } } ], @@ -122,7 +122,7 @@ "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.sm_occupancy'\n" + "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.profiling.sm_occupancy'\n" } } ], @@ -150,7 +150,7 @@ "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.nvlink_traffic_rate'\n| cast_units(\"By/s\")" + "timeSeriesQueryLanguage": "fetch gce_instance\n| metric 'workload.googleapis.com/dcgm.gpu.profiling.nvlink_traffic_rate'\n| cast_units(\"By/s\")" } } ], diff --git a/integrations/airflow/documentation.yaml b/integrations/airflow/documentation.yaml index bf11c50fa9..6ae16d27f4 100644 --- a/integrations/airflow/documentation.yaml +++ b/integrations/airflow/documentation.yaml @@ -3,20 +3,22 @@ app_name_short: Airflow app_name: Apache {{app_name_short}} app_site_name: Airflow app_site_url: https://airflow.apache.org/ -exporter_name: the Airflow exporter -exporter_pkg_name: airflow +exporter_name: StatsD +exporter_pkg_name: statsd exporter_repo_url: https://airflow.apache.org/docs/apache-airflow/stable/logging-monitoring/metrics.html additional_prereq_info: | - {{exporter_name}} exposes Prometheus-format metrics automatically; you do not have to - install it separately. To verify that {{exporter_name}} is emitting metrics on the expected - endpoints, set up port-forwarding with the following command: + The official {{app_name_short}} [Helm chart](https://airflow.apache.org/docs/helm-chart/){:class=external} + includes a {{exporter_name}} deployment that exposes Prometheus-format metrics automatically. + To verify that {{exporter_name}} is emitting metrics on the expected endpoints, do the following: + + 0. Set up port forwarding by using the following command:
- kubectl -n {{namespace_name}} port-forward deploy/airflow-statsd 9102 + kubectl -n {{namespace_name}} port-forward deploy/AIRFLOW_RELEASE_NAME-statsd 9102- Access the endpoint `localhost:9102/metrics` by using the browser or curl in another terminal session - to verify that the metrics are being exposed by the exporter for scraping. + 0. Access the endpoint `localhost:9102/metrics` by using the browser + or the `curl` utility in another terminal session. dashboard_available: true multiple_dashboards: false dashboard_display_name: {{app_name_short}} Prometheus Overview @@ -40,9 +42,9 @@ podmonitoring_config: | component: statsd release: airflow additional_podmonitoring_info: | - Ensure that the values of the `port` and `matchLabels` fields match those of the {{app_name_short}} pods you want to monitor. - The labels and values shown here are set by default when Airflow is deployed with - [Helm](https://airflow.apache.org/docs/helm-chart/){:class=external}. + Ensure that the values of the `port` and `matchLabels` fields match those of the {{exporter_name}} pods you want to monitor. + The labels and values shown here are set by default when {{app_name_short}} is + deployed with [Helm](https://airflow.apache.org/docs/helm-chart/){:class=external}. sample_promql_query: up{job="airflow", cluster="{{cluster_name}}", namespace="{{namespace_name}}"} alerts_config: | apiVersion: monitoring.googleapis.com/v1 diff --git a/integrations/cassandra/ops_agent_metadata.yaml b/integrations/cassandra/ops_agent_metadata.yaml index 0889f24ad7..dfb9d6894c 100644 --- a/integrations/cassandra/ops_agent_metadata.yaml +++ b/integrations/cassandra/ops_agent_metadata.yaml @@ -71,6 +71,15 @@ platforms: - name: jsonPayload.timeStopping type: string description: Seconds the JVM took to stop threads before garbage collection + - name: jsonPayload.level + type: string + description: Level of the log entry + - name: jsonPayload.pid + type: string + description: Process ID of the Java process logging the message + - name: jsonPayload.tid + type: string + description: Thread ID of the Java process logging the message - name: severity type: string description: '' diff --git a/integrations/elasticsearch/ops_agent_metadata.yaml b/integrations/elasticsearch/ops_agent_metadata.yaml index 3dcebcaea3..8c57c7e2d8 100644 --- a/integrations/elasticsearch/ops_agent_metadata.yaml +++ b/integrations/elasticsearch/ops_agent_metadata.yaml @@ -9,7 +9,7 @@ platforms: patch: 0 metrics_minimum_supported_version: major: 2 - minor: 21 + minor: 32 patch: 0 detections: - characteristic_log: @@ -298,3 +298,24 @@ platforms: kind: GAUGE labels: - state + - name: workload.googleapis.com/elasticsearch.cluster.in_flight_fetch + value_type: INT64 + kind: GAUGE + labels: [] + - name: workload.googleapis.com/elasticsearch.cluster.pending_tasks + value_type: INT64 + kind: GAUGE + labels: [] + - name: workload.googleapis.com/elasticsearch.node.cache.count + value_type: INT64 + kind: GAUGE + labels: + - type + - name: workload.googleapis.com/elasticsearch.node.fs.disk.free + value_type: INT64 + kind: GAUGE + labels: [] + - name: workload.googleapis.com/elasticsearch.node.fs.disk.total + value_type: INT64 + kind: GAUGE + labels: [] diff --git a/integrations/haproxy/documentation.yaml b/integrations/haproxy/documentation.yaml index 02a519b37f..43b6e6c9a5 100644 --- a/integrations/haproxy/documentation.yaml +++ b/integrations/haproxy/documentation.yaml @@ -1,27 +1,36 @@ -exporter_type: sidecar +exporter_type: included app_name_short: HAProxy app_name: {{app_name_short}} app_site_name: HAProxy app_site_url: https://www.haproxy.com/ -exporter_name: the HAProxy exporter -exporter_pkg_name: haproxy_exporter -exporter_repo_url: https://github.com/prometheus/haproxy_exporter +exporter_name: the PROMEX service for HAProxy +exporter_pkg_name: haproxy_promex +exporter_repo_url: https://github.com/haproxy/haproxy/blob/master/addons/promex/README dashboard_available: true -minimum_exporter_version: v0.13.0 +minimum_exporter_version: "2.4" multiple_dashboards: false dashboard_display_name: {{app_name_short}} Prometheus Overview -config_mods: | +additional_prereq_info: | + {{app_name_short}} exposes Prometheus-format metrics only when it is + [built with the service enabled](https://github.com/haproxy/haproxy/blob/master/addons/promex/README){:class="external"} and an appropriate `frontend` is included in the configuration. + + Most of the official Docker images for versions greater than or equal to 2.4 are built with this service enabled. + + The following example was built referencing [HAProxy Enterprise documentation](https://www.haproxy.com/documentation/hapee/latest/observability/metrics/prometheus/){:class="external"}. + It works with the community edition and can be modified to suit specific needs: +
apiVersion: v1
kind: ConfigMap
metadata:
name: haproxy
data:
haproxy.cfg: |
- + frontend stats
+ ...
+ + frontend prometheus
+ mode http
+ bind *:8404
- + stats enable
- + stats uri /stats
+ + http-request use-service prometheus-exporter if { path /metrics }
+ ...
---
apiVersion: apps/v1
kind: Deployment
@@ -37,18 +46,11 @@ config_mods: |
+ app.kubernetes.io/name: haproxy
spec:
containers:
- + - name: exporter
- + image: quay.io/prometheus/haproxy-exporter:v0.13.0
- + args:
- + - --haproxy.scrape-uri=http://localhost:8404/stats?stats;csv
- + ports:
- + - containerPort: 9101
- + name: prometheus
- name: haproxy
- image: haproxy:2.3
+ image: haproxy:2.8
ports:
- + - containerPort: 8404
- + name: stats
+ - containerPort: 8404
+ name: prometheus
volumeMounts:
- mountPath: /usr/local/etc/haproxy/haproxy.cfg
subPath: haproxy.cfg
@@ -60,10 +62,15 @@ config_mods: |
items:
- key: haproxy.cfg
path: haproxy.cfg
-additional_install_info: |
- The recommended changes in haproxy.cfg
defines a frontend with the "stats enable" directive
- and enables the {{app_name_short}} stats page. This frontend can be scraped by {{exporter_pkg_name}}.
- For more information, see [Exploring the HAProxy Stats page](https://www.haproxy.com/blog/exploring-the-haproxy-stats-page/){:class=external}.
+
+
+ To verify that {{exporter_name}} is emitting metrics on the expected endpoints, do the following:
+ 1. Set up port forwarding by using the following command:
+ + kubectl -n {{namespace_name}} port-forward {{pod_name}} 8404 ++ 2. Access the endpoint `localhost:8404/metrics` by using the browser + or the `curl` utility in another terminal session. podmonitoring_config: | apiVersion: monitoring.googleapis.com/v1 kind: PodMonitoring @@ -81,6 +88,10 @@ podmonitoring_config: | selector: matchLabels: app.kubernetes.io/name: haproxy +additional_podmonitoring_info: | + Ensure that the values of the `port` and `matchLabels` fields match those of the {{app_name_short}} pods you want to monitor. + + {{app_name_short}} exposes metrics from targets defined in the `bind` configuration option. This option requires a user to define the IP address and port to be listened on for scraping metrics. sample_promql_query: up{job="haproxy", cluster="{{cluster_name}}", namespace="{{namespace_name}}"} alerts_config: | apiVersion: monitoring.googleapis.com/v1 diff --git a/integrations/haproxy/prometheus_metadata.yaml b/integrations/haproxy/prometheus_metadata.yaml index 93b2cc36c2..db8e78c6ea 100644 --- a/integrations/haproxy/prometheus_metadata.yaml +++ b/integrations/haproxy/prometheus_metadata.yaml @@ -7,7 +7,7 @@ platforms: exporter_metadata: name: HAProxy Prometheus Exporter doc_url: https://github.com/prometheus/haproxy_exporter - minimum_supported_version: v0.13.0 + minimum_supported_version: "2.4" default_metrics: - name: prometheus.googleapis.com/haproxy_frontend_http_responses_total/counter prometheus_name: haproxy_frontend_http_responses_total diff --git a/integrations/mysql/documentation.yaml b/integrations/mysql/documentation.yaml index cc17e4654e..2fd432c8a5 100644 --- a/integrations/mysql/documentation.yaml +++ b/integrations/mysql/documentation.yaml @@ -11,6 +11,16 @@ minimum_exporter_version: v0.14.0 multiple_dashboards: false dashboard_display_name: {{app_name_short}} Prometheus Overview config_mods: | + + apiVersion: v1 + + kind: ConfigMap + + metadata: + + name: mysql-exporter-config + + data: + + my.cnf: | + + [client] + + user=root + + password=password + --- apiVersion: apps/v1 kind: StatefulSet metadata: @@ -27,15 +37,18 @@ config_mods: | spec: containers: + - name: exporter - + image: prom/mysqld-exporter:v0.14.0 - + env: - + - name: DATA_SOURCE_NAME - + value: root:password@(localhost:3306)/ + + image: prom/mysqld-exporter:v0.15.0 + + args: + + - --config.my-cnf=/home/my.cnf + ports: + - containerPort: 9104 + name: prometheus - - image: mysql:5.7 - name: mysql + + volumeMounts: + + - mountPath: /home/my.cnf + + subPath: my.cnf + + name: mysql-exporter-config + - name: mysql + image: mysql:5.7 env: - name: MYSQL_ROOT_PASSWORD value: password @@ -47,7 +60,13 @@ config_mods: | value: sbtest ports: - containerPort: 3306 - name: mysql + + volumes: + + - name: mysql-exporter-config + + configMap: + + name: mysql-exporter-config + + items: + + - key: my.cnf + + path: my.cnf podmonitoring_config: | apiVersion: monitoring.googleapis.com/v1 kind: PodMonitoring @@ -68,7 +87,7 @@ podmonitoring_config: | additional_prereq_info: | For information about creating a least-privileged user, see [Required Grants](https://github.com/prometheus/mysqld_exporter#required-grants){:class=external}. additional_install_info: - Update the `DATA_SOURCE_NAME` environment variable with credentials that work with your {{app_name}} instance. + Update the `--config.my-cnf` argument with a path to a configuration file with credentials that work with your {{app_name}} instance sample_promql_query: up{job="mysql", cluster="{{cluster_name}}", namespace="{{namespace_name}}"} alerts_config: | apiVersion: monitoring.googleapis.com/v1 diff --git a/integrations/mysql/ops_agent_metadata.yaml b/integrations/mysql/ops_agent_metadata.yaml index c480168352..00cc485e45 100644 --- a/integrations/mysql/ops_agent_metadata.yaml +++ b/integrations/mysql/ops_agent_metadata.yaml @@ -9,7 +9,7 @@ platforms: patch: 0 metrics_minimum_supported_version: major: 2 - minor: 8 + minor: 32 patch: 0 detections: - characteristic_log: @@ -212,6 +212,14 @@ platforms: kind: CUMULATIVE labels: - operation + - name: workload.googleapis.com/mysql.replica.time_behind_source + value_type: INT64 + kind: GAUGE + labels: [] + - name: workload.googleapis.com/mysql.replica.sql_delay + value_type: INT64 + kind: GAUGE + labels: [] - name: workload.googleapis.com/mysql.row_locks value_type: INT64 kind: CUMULATIVE