From 61a81089a949b5c1c89380e7873fee00587efb6c Mon Sep 17 00:00:00 2001 From: hc-github-team-nomad-core <82989552+hc-github-team-nomad-core@users.noreply.github.com> Date: Mon, 25 Mar 2024 08:24:25 -0500 Subject: [PATCH] Backport of operator debug: fix pprof interval handling into release/1.6.x (#20215) Co-authored-by: Tim Gross --- .changelog/20206.txt | 3 ++ command/operator_debug.go | 28 +++++++++---------- command/operator_debug_test.go | 14 +++++----- .../content/docs/commands/operator/debug.mdx | 7 ++--- 4 files changed, 27 insertions(+), 25 deletions(-) create mode 100644 .changelog/20206.txt diff --git a/.changelog/20206.txt b/.changelog/20206.txt new file mode 100644 index 000000000000..f0c27a09e150 --- /dev/null +++ b/.changelog/20206.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where `operator debug` did not respect the `-pprof-interval` flag and would take only one profile +``` diff --git a/command/operator_debug.go b/command/operator_debug.go index 83a26df7a943..a2a8978897a1 100644 --- a/command/operator_debug.go +++ b/command/operator_debug.go @@ -200,8 +200,8 @@ Debug Options: -pprof-interval= The interval between pprof collections. Set interval equal to - duration to capture a single snapshot. Defaults to 250ms or - -pprof-duration, whichever is less. + duration to capture a single snapshot. Defaults to 30s or + -pprof-duration, whichever is more. -server-id=, Comma separated list of Nomad server names to monitor for logs, API @@ -372,7 +372,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { flags.BoolVar(&allowStale, "stale", false, "") flags.StringVar(&output, "output", "", "") flags.StringVar(&pprofDuration, "pprof-duration", "1s", "") - flags.StringVar(&pprofInterval, "pprof-interval", "250ms", "") + flags.StringVar(&pprofInterval, "pprof-interval", "30s", "") flags.BoolVar(&c.verbose, "verbose", false, "") c.consul = &external{tls: &api.TLSConfig{}} @@ -439,7 +439,7 @@ func (c *OperatorDebugCommand) Run(args []string) int { c.Ui.Error(fmt.Sprintf("Error parsing pprof-interval: %s: %s", pprofInterval, err.Error())) return 1 } - if pi.Seconds() > pd.Seconds() { + if pi.Seconds() < pd.Seconds() { pi = pd } c.pprofInterval = pi @@ -1036,27 +1036,27 @@ func (c *OperatorDebugCommand) collectPprof(path, id string, client *api.Client, // goroutine debug type 1 = legacy text format for human readable output opts.Debug = 1 - c.savePprofProfile(path, "goroutine", opts, client) + c.savePprofProfile(path, "goroutine", opts, client, interval) // goroutine debug type 2 = goroutine stacks in panic format opts.Debug = 2 - c.savePprofProfile(path, "goroutine", opts, client) + c.savePprofProfile(path, "goroutine", opts, client, interval) // Reset to pprof binary format opts.Debug = 0 - c.savePprofProfile(path, "goroutine", opts, client) // Stack traces of all current goroutines - c.savePprofProfile(path, "trace", opts, client) // A trace of execution of the current program - c.savePprofProfile(path, "heap", opts, client) // A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample. - c.savePprofProfile(path, "allocs", opts, client) // A sampling of all past memory allocations - c.savePprofProfile(path, "threadcreate", opts, client) // Stack traces that led to the creation of new OS threads + c.savePprofProfile(path, "goroutine", opts, client, interval) // Stack traces of all current goroutines + c.savePprofProfile(path, "trace", opts, client, interval) // A trace of execution of the current program + c.savePprofProfile(path, "heap", opts, client, interval) // A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample. + c.savePprofProfile(path, "allocs", opts, client, interval) // A sampling of all past memory allocations + c.savePprofProfile(path, "threadcreate", opts, client, interval) // Stack traces that led to the creation of new OS threads } // savePprofProfile retrieves a pprof profile and writes to disk -func (c *OperatorDebugCommand) savePprofProfile(path string, profile string, opts api.PprofOptions, client *api.Client) { - fileName := fmt.Sprintf("%s.prof", profile) +func (c *OperatorDebugCommand) savePprofProfile(path string, profile string, opts api.PprofOptions, client *api.Client, interval int) { + fileName := fmt.Sprintf("%s_%04d.prof", profile, interval) if opts.Debug > 0 { - fileName = fmt.Sprintf("%s-debug%d.txt", profile, opts.Debug) + fileName = fmt.Sprintf("%s-debug%d_%04d.txt", profile, opts.Debug, interval) } bs, err := retrievePprofProfile(profile, opts, client, c.queryOpts()) diff --git a/command/operator_debug_test.go b/command/operator_debug_test.go index 4d612187d95e..feb99f3680b7 100644 --- a/command/operator_debug_test.go +++ b/command/operator_debug_test.go @@ -451,14 +451,14 @@ func TestDebug_CapturedFiles(t *testing.T) { } pprofFiles := []string{ - "allocs.prof", - "goroutine-debug1.txt", - "goroutine-debug2.txt", - "goroutine.prof", - "heap.prof", + "allocs_0000.prof", + "goroutine-debug1_0000.txt", + "goroutine-debug2_0000.txt", + "goroutine_0000.prof", + "heap_0000.prof", "profile_0000.prof", - "threadcreate.prof", - "trace.prof", + "threadcreate_0000.prof", + "trace_0000.prof", } clientFiles := []string{ diff --git a/website/content/docs/commands/operator/debug.mdx b/website/content/docs/commands/operator/debug.mdx index 362bdfe10602..8ffde93526f9 100644 --- a/website/content/docs/commands/operator/debug.mdx +++ b/website/content/docs/commands/operator/debug.mdx @@ -70,10 +70,9 @@ true. - `-pprof-duration=`: Duration for pprof collection. Defaults to 1s or `-duration`, whichever is less. -- `-pprof-interval=`: The interval between pprof - collections. Set interval equal to duration to capture a single - snapshot. Defaults to 250ms or `-pprof-duration`, whichever is - less. +- `-pprof-interval=30s`: The interval between pprof collections. Set interval + equal to pprof duration to capture a single snapshot. Defaults to 30s or + `-pprof-duration`, whichever is more. - `-server-id=,`: Comma separated list of Nomad server names to monitor for logs, API outputs, and pprof profiles. Accepts server names, "leader", or