diff --git a/pkg/audit/manager.go b/pkg/audit/manager.go index 37abeeca0dc..9b96656b539 100644 --- a/pkg/audit/manager.go +++ b/pkg/audit/manager.go @@ -496,7 +496,7 @@ func (am *Manager) auditFromCache(ctx context.Context) ([]Result, []error) { if *logStatsAudit { logging.LogStatsEntries( am.opa, - am.log, + am.log.WithValues(logging.EventType, "audit_cache_stats"), resp.StatsEntries, "audit from cache review request stats", ) @@ -615,7 +615,7 @@ func (am *Manager) reviewObjects(ctx context.Context, kind string, folderCount i if *logStatsAudit { logging.LogStatsEntries( am.opa, - am.log, + am.log.WithValues(logging.EventType, "audit_stats"), resp.StatsEntries, "audit review request stats", ) diff --git a/pkg/logging/logging.go b/pkg/logging/logging.go index 93c0ee0d228..d305b92b6d6 100644 --- a/pkg/logging/logging.go +++ b/pkg/logging/logging.go @@ -37,5 +37,9 @@ const ( ) func LogStatsEntries(client *constraintclient.Client, logger logr.Logger, entries []*instrumentation.StatsEntry, msg string) { + if len(entries) == 0 { + return + } + logger.WithValues(ExecutionStats, gkinstr.ToStatsEntriesWithDesc(client, entries)).Info(msg) } diff --git a/pkg/logging/logging_test.go b/pkg/logging/logging_test.go index 25522cb17bb..17ea7f3b120 100644 --- a/pkg/logging/logging_test.go +++ b/pkg/logging/logging_test.go @@ -31,6 +31,7 @@ func Test_LogStatsEntries(t *testing.T) { defer require.NoError(t, zapLogger.Sync()) testLogger := zapr.NewLogger(zapLogger) + // test log lines show up LogStatsEntries( &constraintclient.Client{}, testLogger, @@ -64,6 +65,15 @@ func Test_LogStatsEntries(t *testing.T) { "\"source\":{\"type\":\"someType\",\"value\":\"someValue\"},\"description\":\"%s\"}],"+ "\"labels\":[{\"name\":\"someLabel\",\"value\":\"someLabelValue\"}]}]}\n", instrumentation.UnknownDescription) require.Contains(t, testBuf.String(), expectedLogLine) + + // test that empty stats don't log + LogStatsEntries( + &constraintclient.Client{}, + testLogger, + []*instrumentation.StatsEntry{}, + "this message should not be logged", + ) + require.NotContains(t, testBuf.String(), "this message should not be logged") } //// logging utilities for testing below ///// diff --git a/website/docs/metrics.md b/website/docs/metrics.md index 46ff7f0c7a8..64165ba77c2 100644 --- a/website/docs/metrics.md +++ b/website/docs/metrics.md @@ -1,11 +1,88 @@ --- id: metrics -title: Metrics +title: Metrics & Observability --- +## Observability + +This section covers how to gather more detailed statistics about Gatekeeper's query performance. This can be helpful in diagnosing situations such as identifying a constraint template with a long execution time. Statistics are written to Gatekeeper's stdout logs. + +### Logging Constraint Execution Stats + +- set `--log-stats-audit`. This flag enables logging the stats for the audit process. + +- set `--log-stats-admission`. This flag enables logging the stats for the admission review process. + +#### Example Log Line + +To see how long it takes to review a constraint kind at admission time, enable the `--log-stats-admission` flag and watch the logs for a constraint kind `K8sRequiredLabels`, for example: + +```json +{ + "level": "info", + "ts": 1683692576.9093642, + "logger": "webhook", + "msg": "admission review request stats", + "hookType": "validation", + "process": "admission", + "event_type": "review_response_stats", + "resource_group": "", + "resource_api_version": "v1", + "resource_kind": "Namespace", + "resource_namespace": "", + "request_username": "kubernetes-admin", + "execution_stats": [ + { + "scope": "template", + "statsFor": "K8sRequiredLabels", + "stats": [ + { + "name": "templateRunTimeNS", + "value": 762561, + "source": { + "type": "engine", + "value": "Rego" + }, + "description": "the number of nanoseconds it took to evaluate all constraints for a template" + }, + { + "name": "constraintCount", + "value": 1, + "source": { + "type": "engine", + "value": "Rego" + }, + "description": "the number of constraints that were evaluated for the given constraint kind" + } + ], + "labels": [ + { + "name": "TracingEnabled", + "value": false + }, + { + "name": "PrintEnabled", + "value": false + }, + { + "name": "target", + "value": "admission.k8s.gatekeeper.sh" + } + ] + } + ] +} +``` + +In the excerpt above, notice `templateRunTimeNS` and `constraintCount`. The former indicates the time it takes to evaluate the number of constraints of kind `K8sRequiredLabels`, while the latter surfaces how many such constraints were evaluated for this template. Labels provide additional information about the execution environemnt setup, like whether tracing was enabled (`TraceEnabled`). + +#### Caveats + +The additional log volume from enabling the stats logging can be quite high. +## Metrics Below are the list of metrics provided by Gatekeeper: -## Constraint +### Constraint - Name: `gatekeeper_constraints` @@ -19,7 +96,7 @@ Below are the list of metrics provided by Gatekeeper: Aggregation: `LastValue` -## Constraint Template +### Constraint Template - Name: `gatekeeper_constraint_templates` @@ -51,7 +128,7 @@ Below are the list of metrics provided by Gatekeeper: Aggregation: `Distribution` -## Expansion Template +### Expansion Template - Name: `gatekeeper_expansion_templates` @@ -59,7 +136,7 @@ Below are the list of metrics provided by Gatekeeper: Aggregation: `LastValue` -## Webhook +### Webhook - Name: `gatekeeper_validation_request_count` @@ -103,7 +180,7 @@ Below are the list of metrics provided by Gatekeeper: Aggregation: `Distribution` -## Audit +### Audit - Name: `gatekeeper_violations` @@ -133,7 +210,7 @@ Below are the list of metrics provided by Gatekeeper: Aggregation: `LastValue` -## Mutation +### Mutation - Name: `gatekeeper_mutator_ingestion_count` @@ -175,7 +252,7 @@ Below are the list of metrics provided by Gatekeeper: Aggregation: `Count` -## Sync +### Sync - Name: `gatekeeper_sync` @@ -201,7 +278,7 @@ Below are the list of metrics provided by Gatekeeper: Aggregation: `LastValue` -## Watch +### Watch - Name: `gatekeeper_watch_manager_watched_gvk`