From fe958a54c86bf52aad48d52c15874b2492412561 Mon Sep 17 00:00:00 2001 From: Steven Swartz Date: Fri, 27 Dec 2024 11:22:47 -0500 Subject: [PATCH] Clean-up JFR files left behind by previous instances of alloy to minimize risk of filling up disk when alloy's in a crash loop --- CHANGELOG.md | 2 ++ internal/component/pyroscope/java/loop.go | 41 +++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 685f799212..9608c8f2f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,6 +70,8 @@ Main (unreleased) - Fixed an issue where the `otelcol.processor.interval` could not be used because the debug metrics were not set to default. (@wildum) +- Fixed an issue where `pyroscope.java` did not remove unused JFR files created by previous Alloy instances. (@swar8080) + ### Other changes - Change the stability of the `livedebugging` feature from "experimental" to "generally available". (@wildum) diff --git a/internal/component/pyroscope/java/loop.go b/internal/component/pyroscope/java/loop.go index 6a25e214bb..38254f0fbc 100644 --- a/internal/component/pyroscope/java/loop.go +++ b/internal/component/pyroscope/java/loop.go @@ -7,6 +7,8 @@ import ( _ "embed" "fmt" "os" + "path/filepath" + "regexp" "strconv" "strings" "sync" @@ -23,7 +25,12 @@ import ( gopsutil "github.com/shirou/gopsutil/v3/process" ) -const spyName = "alloy.java" +const ( + spyName = "alloy.java" + processJfrDir = "/tmp" +) + +var jfrFileNamePattern = regexp.MustCompile("^asprof-\\d+-\\d+\\.jfr$") type profilingLoop struct { logger log.Logger @@ -45,6 +52,7 @@ type profilingLoop struct { func newProfilingLoop(pid int, target discovery.Target, logger log.Logger, profiler *asprof.Profiler, output *pyroscope.Fanout, cfg ProfilingConfig) *profilingLoop { ctx, cancel := context.WithCancel(context.Background()) dist, err := profiler.DistributionForProcess(pid) + jfrFileName := fmt.Sprintf("asprof-%d-%d.jfr", os.Getpid(), pid) p := &profilingLoop{ logger: log.With(logger, "pid", pid), output: output, @@ -52,7 +60,7 @@ func newProfilingLoop(pid int, target discovery.Target, logger log.Logger, profi target: target, cancel: cancel, dist: dist, - jfrFile: fmt.Sprintf("/tmp/asprof-%d-%d.jfr", os.Getpid(), pid), + jfrFile: filepath.Join(processJfrDir, jfrFileName), cfg: cfg, profiler: profiler, } @@ -63,6 +71,16 @@ func newProfilingLoop(pid int, target discovery.Target, logger log.Logger, profi return p } + p.wg.Add(1) + go func() { + defer p.wg.Done() + // Clean-up files that weren't removed up by a previous instance of alloy + err := p.cleanupOldJFRFiles(jfrFileName) + if err != nil { + _ = level.Warn(p.logger).Log("msg", "failed cleaning-up java jfr files created by a previous instance of alloy", "err", err) + } + }() + p.wg.Add(1) go func() { defer p.wg.Done() @@ -275,3 +293,22 @@ func (p *profilingLoop) alive() bool { } return err == nil && exists } + +func (p *profilingLoop) cleanupOldJFRFiles(myFileName string) error { + dir := asprof.ProcessPath(processJfrDir, p.pid) + files, err := os.ReadDir(dir) + if err != nil { + return err + } + + for _, file := range files { + if !file.IsDir() && jfrFileNamePattern.MatchString(file.Name()) && file.Name() != myFileName { + _ = level.Debug(p.logger).Log("msg", "deleting jfr file created by previous alloy process", "file", file.Name()) + err := os.Remove(filepath.Join(dir, file.Name())) + if err != nil { + return err + } + } + } + return nil +}