From d2479b5e47f664acb03f24a4dfcb12b2dd35f4e3 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 18 Nov 2024 17:05:52 +0100 Subject: [PATCH 01/16] Common temporary location manager for profiling product --- .../controller/openjdk/OpenJdkController.java | 109 ++-------- .../controller/TempLocationManager.java | 201 ++++++++++++++++++ .../controller/TempLocationManagerTest.java | 83 ++++++++ .../profiling/ddprof/DatadogProfiler.java | 19 +- .../profiling/ddprof/JavaProfilerLoader.java | 18 +- .../trace/api/config/ProfilingConfig.java | 12 +- 6 files changed, 343 insertions(+), 99 deletions(-) create mode 100644 dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java create mode 100644 dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java diff --git a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java index dbe3c15bb84..c1ec6e12844 100644 --- a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java +++ b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java @@ -18,8 +18,6 @@ import static com.datadog.profiling.controller.ProfilingSupport.*; import static com.datadog.profiling.controller.ProfilingSupport.isObjectCountParallelized; import static datadog.trace.api.Platform.isJavaVersionAtLeast; -import static datadog.trace.api.config.ProfilingConfig.PROFILING_DEBUG_CLEANUP_REPO; -import static datadog.trace.api.config.ProfilingConfig.PROFILING_DEBUG_CLEANUP_REPO_DEFAULT; import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_ENABLED; import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_ENABLED_DEFAULT; import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_MODE; @@ -33,6 +31,7 @@ import com.datadog.profiling.controller.ConfigurationException; import com.datadog.profiling.controller.Controller; import com.datadog.profiling.controller.ControllerContext; +import com.datadog.profiling.controller.TempLocationManager; import com.datadog.profiling.controller.jfr.JFRAccess; import com.datadog.profiling.controller.jfr.JfpUtils; import com.datadog.profiling.controller.openjdk.events.AvailableProcessorCoresEvent; @@ -42,19 +41,13 @@ import datadog.trace.bootstrap.config.provider.ConfigProvider; import datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureProfiling; import datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionProfiling; -import datadog.trace.util.PidHelper; import de.thetaphi.forbiddenapis.SuppressForbidden; import java.io.IOException; -import java.nio.file.FileVisitResult; -import java.nio.file.FileVisitor; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.attribute.BasicFileAttributes; import java.time.Duration; import java.util.Collections; import java.util.Map; -import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,14 +85,8 @@ public OpenJdkController(final ConfigProvider configProvider) // configure the JFR stackdepth before we try to load any JFR classes int requestedStackDepth = getConfiguredStackDepth(configProvider); this.jfrStackDepthApplied = JFRAccess.instance().setStackDepth(requestedStackDepth); - boolean shouldCleanupJfrRepository = - configProvider.getBoolean( - PROFILING_DEBUG_CLEANUP_REPO, PROFILING_DEBUG_CLEANUP_REPO_DEFAULT); - String jfrRepositoryBase = null; - if (shouldCleanupJfrRepository) { - jfrRepositoryBase = getJfrRepositoryBase(configProvider); - JFRAccess.instance().setBaseLocation(jfrRepositoryBase + "/pid_" + PidHelper.getPid()); - } + String jfrRepositoryBase = getJfrRepositoryBase(configProvider); + JFRAccess.instance().setBaseLocation(jfrRepositoryBase); // Make sure we can load JFR classes before declaring that we have successfully created // factory and can use it. Class.forName("jdk.jfr.Recording"); @@ -112,10 +99,6 @@ public OpenJdkController(final ConfigProvider configProvider) Map recordingSettings; try { - if (shouldCleanupJfrRepository) { - cleanupJfrRepositories(Paths.get(jfrRepositoryBase)); - } - recordingSettings = JfpUtils.readNamedJfpResource( ultraMinimal ? JfpUtils.SAFEPOINTS_JFP : JfpUtils.DEFAULT_JFP); @@ -270,21 +253,27 @@ && isEventEnabled(recordingSettings, "jdk.NativeMethodSample")) { } private static String getJfrRepositoryBase(ConfigProvider configProvider) { - return configProvider.getString( - ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE, - ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE_DEFAULT); - } - - private static void cleanupJfrRepositories(Path repositoryBase) { - try { - Files.walkFileTree(repositoryBase, new JfrCleanupVisitor(repositoryBase)); - } catch (IOException e) { - if (log.isDebugEnabled()) { - log.warn("Unable to cleanup old JFR repositories", e); - } else { - log.warn("Unable to cleanup old JFR repositories"); + String legacy = + configProvider.getString( + ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE, + ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE_DEFAULT); + if (!legacy.equals(ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE_DEFAULT)) { + log.warn( + "The configuration key {} is deprecated. Please use {} instead.", + ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE, + ProfilingConfig.PROFILING_TEMP_DIR); + } + Path repositoryPath = TempLocationManager.getInstance().getTempDir().resolve("jfr"); + if (!Files.exists(repositoryPath)) { + try { + Files.createDirectories(repositoryPath); + } catch (IOException e) { + log.error("Failed to create JFR repository directory: {}", repositoryPath, e); + throw new IllegalStateException( + "Failed to create JFR repository directory: " + repositoryPath, e); } } + return repositoryPath.toString(); } int getMaxSize() { @@ -331,58 +320,4 @@ private int getConfiguredStackDepth(ConfigProvider configProvider) { return configProvider.getInteger( ProfilingConfig.PROFILING_STACKDEPTH, ProfilingConfig.PROFILING_STACKDEPTH_DEFAULT); } - - private static class JfrCleanupVisitor implements FileVisitor { - private boolean shouldClean = false; - - private final Path root; - private final Set pidSet = PidHelper.getJavaPids(); - - JfrCleanupVisitor(Path root) { - this.root = root; - } - - @Override - public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) - throws IOException { - if (dir.equals(root)) { - return FileVisitResult.CONTINUE; - } - String fileName = dir.getFileName().toString(); - // the JFR repository directories are under /pid_ - String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null; - shouldClean |= pid != null && !pidSet.contains(pid); - if (shouldClean) { - log.debug("Cleaning JFR repository under {}", dir); - } - return shouldClean ? FileVisitResult.CONTINUE : FileVisitResult.SKIP_SUBTREE; - } - - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - if (file.toString().toLowerCase().endsWith(".jfr")) { - Files.delete(file); - } - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { - if (log.isDebugEnabled() && file.toString().toLowerCase().endsWith(".jfr")) { - log.debug("Failed to delete file {}", file, exc); - } - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { - if (shouldClean) { - Files.delete(dir); - String fileName = dir.getFileName().toString(); - // reset the flag only if we are done cleaning the top-level directory - shouldClean = !fileName.startsWith("pid_"); - } - return FileVisitResult.CONTINUE; - } - } } diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java new file mode 100644 index 00000000000..5915655f8f6 --- /dev/null +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -0,0 +1,201 @@ +package com.datadog.profiling.controller; + +import datadog.trace.api.config.ProfilingConfig; +import datadog.trace.bootstrap.config.provider.ConfigProvider; +import datadog.trace.util.PidHelper; +import java.io.IOException; +import java.nio.file.FileVisitResult; +import java.nio.file.FileVisitor; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A manager class for temporary locations used by the profiling product. The temporary location is + * keyed by the process ID and allows for cleanup of orphaned temporary files on startup by querying + * the list of running Java processes and cleaning up any temporary locations that do not correspond + * to a running Java process. Also, the temporary location is cleaned up on shutdown. + */ +public final class TempLocationManager { + private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class); + + private static final class SingletonHolder { + private static final TempLocationManager INSTANCE = new TempLocationManager(); + } + + private class CleanupVisitor implements FileVisitor { + private boolean shouldClean = false; + + private final Set pidSet = PidHelper.getJavaPids(); + + private final boolean cleanSelf; + + CleanupVisitor(boolean cleanSelf) { + this.cleanSelf = cleanSelf; + } + + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + if (dir.equals(baseTempDir)) { + return FileVisitResult.CONTINUE; + } + String fileName = dir.getFileName().toString(); + // the JFR repository directories are under /pid_ + String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null; + boolean isSelfPid = pid != null && pid.equals(PidHelper.getPid()); + shouldClean |= (cleanSelf && isSelfPid) || (!cleanSelf && !pidSet.contains(pid)); + if (shouldClean) { + log.debug("Cleaning temporary location {}", dir); + } + return shouldClean ? FileVisitResult.CONTINUE : FileVisitResult.SKIP_SUBTREE; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + if (log.isDebugEnabled()) { + log.debug("Failed to delete file {}", file, exc); + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (shouldClean) { + Files.delete(dir); + String fileName = dir.getFileName().toString(); + // reset the flag only if we are done cleaning the top-level directory + shouldClean = !fileName.startsWith("pid_"); + } + return FileVisitResult.CONTINUE; + } + } + + private final Path baseTempDir; + private final Path tempDir; + + private final CompletableFuture cleanupTask; + + /** + * Get the singleton instance of the TempLocationManager. It will run the cleanup task in the + * background. + * + * @return the singleton instance of the TempLocationManager + */ + public static TempLocationManager getInstance() { + return getInstance(false); + } + + /** + * Get the singleton instance of the TempLocationManager. + * + * @param waitForCleanup if true, wait for the cleanup task to finish before returning + * @return the singleton instance of the TempLocationManager + */ + static TempLocationManager getInstance(boolean waitForCleanup) { + TempLocationManager instance = SingletonHolder.INSTANCE; + if (waitForCleanup) { + instance.waitForCleanup(); + } + return instance; + } + + private TempLocationManager() { + this(ConfigProvider.getInstance()); + } + + TempLocationManager(ConfigProvider configProvider) { + Path configuredTempDir = + Paths.get( + configProvider.getString( + ProfilingConfig.PROFILING_TEMP_DIR, ProfilingConfig.PROFILING_TEMP_DIR_DEFAULT)); + if (!Files.exists(configuredTempDir)) { + log.warn( + "Base temp directory, as defined in '" + + ProfilingConfig.PROFILING_TEMP_DIR + + "' does not exist: " + + configuredTempDir); + throw new IllegalStateException( + "Base temp directory, as defined in '" + + ProfilingConfig.PROFILING_TEMP_DIR + + "' does not exist: " + + configuredTempDir); + } + + String pid = PidHelper.getPid(); + + baseTempDir = configuredTempDir.resolve("ddprof"); + tempDir = baseTempDir.resolve("pid_" + pid); + cleanupTask = CompletableFuture.runAsync(() -> cleanup(false)); + + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + try { + cleanupTask.join(); + } finally { + cleanup(true); + } + }, + "Temp Location Manager Cleanup")); + } + + /** + * Get the temporary directory for the current process. + * + * @return the temporary directory for the current process + */ + public Path getTempDir() { + return getTempDir(true); + } + + /** + * Get the temporary directory for the current process. + * + * @param create if true, create the directory if it does not exist + * @return the temporary directory for the current process + * @throws IllegalStateException if the directory could not be created + */ + public Path getTempDir(boolean create) { + if (create && !Files.exists(tempDir)) { + try { + Files.createDirectories( + tempDir, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + } catch (Exception e) { + log.warn("Failed to create temp directory: {}", tempDir, e); + throw new IllegalStateException("Failed to create temp directory: " + tempDir, e); + } + } + return tempDir; + } + + void cleanup(boolean cleanSelf) { + try { + Files.walkFileTree(baseTempDir, new CleanupVisitor(cleanSelf)); + } catch (IOException e) { + if (log.isDebugEnabled()) { + log.warn("Unable to cleanup temp location {}", baseTempDir, e); + } else { + log.warn("Unable to cleanup temp location {}", baseTempDir); + } + } + } + + private void waitForCleanup() { + cleanupTask.join(); + } +} diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java new file mode 100644 index 00000000000..df89ca881a5 --- /dev/null +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -0,0 +1,83 @@ +package com.datadog.profiling.controller; + +import static org.junit.jupiter.api.Assertions.*; + +import datadog.trace.api.config.ProfilingConfig; +import datadog.trace.bootstrap.config.provider.ConfigProvider; +import datadog.trace.util.PidHelper; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Properties; +import org.junit.jupiter.api.Test; + +public class TempLocationManagerTest { + @Test + void testDefault() throws Exception { + TempLocationManager tempLocationManager = TempLocationManager.getInstance(true); + Path tempDir = tempLocationManager.getTempDir(); + assertNotNull(tempDir); + assertTrue(Files.exists(tempDir)); + assertTrue(Files.isDirectory(tempDir)); + assertTrue(Files.isWritable(tempDir)); + assertTrue(Files.isReadable(tempDir)); + assertTrue(Files.isExecutable(tempDir)); + assertTrue(tempDir.endsWith("pid_" + PidHelper.getPid())); + + // fake temp location + Path fakeTempDir = tempDir.getParent().resolve("pid_00000"); + Files.createDirectories(fakeTempDir); + tempLocationManager.cleanup(false); + // fake temp location should be deleted + // real temp location should be kept + assertFalse(Files.exists(fakeTempDir)); + assertTrue(Files.exists(tempDir)); + } + + @Test + void testFromConfig() throws Exception { + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test1"); + try { + Files.createDirectories(myDir); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + Path tempDir = tempLocationManager.getTempDir(); + assertNotNull(tempDir); + assertTrue(tempDir.toString().startsWith(myDir.toString())); + } finally { + // make sure to delete the test directory + Files.delete(myDir); + } + } + + @Test + void testFromConfigInvalid() { + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test2"); + // do not create the directory - it should trigger an exception + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + assertThrows(IllegalStateException.class, () -> new TempLocationManager(configProvider)); + } + + @Test + void testFromConfigNotWritable() throws Exception { + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test3"); + try { + Files.createDirectories( + myDir, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + assertThrows(IllegalStateException.class, tempLocationManager::getTempDir); + } finally { + // make sure to delete the test directory + Files.delete(myDir); + } + } +} diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java index c4c49d82731..5c9fbba38b7 100644 --- a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java +++ b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java @@ -33,6 +33,7 @@ import static datadog.trace.api.config.ProfilingConfig.PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS_DEFAULT; import com.datadog.profiling.controller.OngoingRecording; +import com.datadog.profiling.controller.TempLocationManager; import com.datadog.profiling.utils.ProfilingMode; import com.datadoghq.profiler.ContextSetter; import com.datadoghq.profiler.JavaProfiler; @@ -43,6 +44,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -106,6 +108,8 @@ public static DatadogProfiler newInstance(ConfigProvider configProvider) { private final long queueTimeThresholdMillis; + private final Path recordingsPath; + private DatadogProfiler(ConfigProvider configProvider) { this(configProvider, getContextAttributes(configProvider)); } @@ -148,6 +152,19 @@ private DatadogProfiler(ConfigProvider configProvider) { configProvider.getLong( PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS, PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS_DEFAULT); + + this.recordingsPath = TempLocationManager.getInstance().getTempDir().resolve("recordings"); + if (!Files.exists(recordingsPath)) { + try { + Files.createDirectories( + recordingsPath, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + } catch (IOException e) { + log.warn("Failed to create recordings directory: {}", recordingsPath, e); + throw new IllegalStateException( + "Failed to create recordings directory: " + recordingsPath, e); + } + } } void addThread() { @@ -212,7 +229,7 @@ String executeProfilerCmd(String cmd) throws IOException { Path newRecording() throws IOException, IllegalStateException { if (recordingFlag.compareAndSet(false, true)) { - Path recFile = Files.createTempFile("dd-profiler-", ".jfr"); + Path recFile = Files.createTempFile(recordingsPath, "dd-profiler-", ".jfr"); String cmd = cmdStartProfiling(recFile); try { String rslt = executeProfilerCmd(cmd); diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java index a2b24925b54..654bd63f904 100644 --- a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java +++ b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java @@ -1,8 +1,12 @@ package com.datadog.profiling.ddprof; +import com.datadog.profiling.controller.TempLocationManager; import com.datadoghq.profiler.JavaProfiler; import datadog.trace.api.config.ProfilingConfig; import datadog.trace.bootstrap.config.provider.ConfigProvider; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermissions; /** * Only loading the profiler itself needs to be protected as a singleton. Separating the loading @@ -17,12 +21,20 @@ public class JavaProfilerLoader { Throwable reasonNotLoaded = null; try { ConfigProvider configProvider = ConfigProvider.getInstance(); + String scratch = configProvider.getString(ProfilingConfig.PROFILING_DATADOG_PROFILER_SCRATCH); + if (scratch == null) { + Path scratchPath = TempLocationManager.getInstance().getTempDir().resolve("scratch"); + if (!Files.exists(scratchPath)) { + Files.createDirectories( + scratchPath, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))); + } + scratch = scratchPath.toString(); + } profiler = JavaProfiler.getInstance( configProvider.getString(ProfilingConfig.PROFILING_DATADOG_PROFILER_LIBPATH), - configProvider.getString( - ProfilingConfig.PROFILING_DATADOG_PROFILER_SCRATCH, - ProfilingConfig.PROFILING_DATADOG_PROFILER_SCRATCH_DEFAULT)); + scratch); // sanity test - force load Datadog profiler to catch it not being available early profiler.execute("status"); } catch (Throwable t) { diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java index 4d5c10f7fb1..28f7012edd3 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java @@ -72,9 +72,6 @@ public final class ProfilingConfig { public static final String PROFILING_JFR_REPOSITORY_BASE_DEFAULT = System.getProperty("java.io.tmpdir") + "/dd/jfr"; - public static final String PROFILING_JFR_REPOSITORY_CLEANUP = "profiling.jfr.repository.cleanup"; - public static final boolean PROFILING_JFR_REPOSITORY_CLEANUP_DEFAULT = true; - public static final String PROFILING_DATADOG_PROFILER_ENABLED = "profiling.ddprof.enabled"; public static final String PROFILING_DIRECT_ALLOCATION_ENABLED = @@ -87,8 +84,7 @@ public final class ProfilingConfig { // Java profiler lib needs to be extracted from JAR and placed into the scratch location // By default the scratch is the os temp directory but can be overridden by user public static final String PROFILING_DATADOG_PROFILER_SCRATCH = "profiling.ddprof.scratch"; - public static final String PROFILING_DATADOG_PROFILER_SCRATCH_DEFAULT = - System.getProperty("java.io.tmpdir"); + public static final String PROFILING_DATADOG_PROFILER_LIBPATH = "profiling.ddprof.debug.lib"; public static final String PROFILING_DATADOG_PROFILER_ALLOC_ENABLED = "profiling.ddprof.alloc.enabled"; @@ -182,6 +178,9 @@ public final class ProfilingConfig { public static final String PROFILING_UPLOAD_SUMMARY_ON_413 = "profiling.upload.summary-on-413"; public static final boolean PROFILING_UPLOAD_SUMMARY_ON_413_DEFAULT = false; + public static final String PROFILING_TEMP_DIR = "profiling.tempdir"; + public static final String PROFILING_TEMP_DIR_DEFAULT = System.getProperty("java.io.tmpdir"); + // Not intended for production use public static final String PROFILING_AGENTLESS = "profiling.agentless"; public static final boolean PROFILING_AGENTLESS_DEFAULT = false; @@ -192,9 +191,6 @@ public final class ProfilingConfig { public static final String PROFILING_DEBUG_DUMP_PATH = "profiling.debug.dump_path"; public static final String PROFILING_DEBUG_JFR_DISABLED = "profiling.debug.jfr.disabled"; - public static final String PROFILING_DEBUG_CLEANUP_REPO = "profiling.debug.cleanup.jfr.repo"; - public static final boolean PROFILING_DEBUG_CLEANUP_REPO_DEFAULT = false; - public static final String PROFILING_CONTEXT_ATTRIBUTES = "profiling.context.attributes"; public static final String PROFILING_CONTEXT_ATTRIBUTES_SPAN_NAME_ENABLED = From 06ae694fa7369b345d7c1b3a5e41805eb7203f4a Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 18 Nov 2024 17:13:48 +0100 Subject: [PATCH 02/16] Update dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java Co-authored-by: datadog-datadog-prod-us1[bot] <88084959+datadog-datadog-prod-us1[bot]@users.noreply.github.com> --- .../com/datadog/profiling/controller/TempLocationManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index 5915655f8f6..f35cbf0aefc 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -30,7 +30,7 @@ private static final class SingletonHolder { } private class CleanupVisitor implements FileVisitor { - private boolean shouldClean = false; + private boolean shouldClean; private final Set pidSet = PidHelper.getJavaPids(); From ff48e468c87c34cdb5059285a4848a9e64410415 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 18 Nov 2024 17:35:08 +0100 Subject: [PATCH 03/16] Fix tests --- .../controller/TempLocationManagerTest.java | 42 +++++++------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index df89ca881a5..4476859bc01 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -38,19 +38,14 @@ void testDefault() throws Exception { @Test void testFromConfig() throws Exception { Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test1"); - try { - Files.createDirectories(myDir); - Properties props = new Properties(); - props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); - ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); - TempLocationManager tempLocationManager = new TempLocationManager(configProvider); - Path tempDir = tempLocationManager.getTempDir(); - assertNotNull(tempDir); - assertTrue(tempDir.toString().startsWith(myDir.toString())); - } finally { - // make sure to delete the test directory - Files.delete(myDir); - } + Files.createDirectories(myDir); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + Path tempDir = tempLocationManager.getTempDir(); + assertNotNull(tempDir); + assertTrue(tempDir.toString().startsWith(myDir.toString())); } @Test @@ -66,18 +61,13 @@ void testFromConfigInvalid() { @Test void testFromConfigNotWritable() throws Exception { Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test3"); - try { - Files.createDirectories( - myDir, - PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); - Properties props = new Properties(); - props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); - ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); - TempLocationManager tempLocationManager = new TempLocationManager(configProvider); - assertThrows(IllegalStateException.class, tempLocationManager::getTempDir); - } finally { - // make sure to delete the test directory - Files.delete(myDir); - } + Files.createDirectories( + myDir, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + assertThrows(IllegalStateException.class, tempLocationManager::getTempDir); } } From 92d1ecf1095b560f83e7a3bce5ea6263cba8c433 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 18 Nov 2024 17:56:50 +0100 Subject: [PATCH 04/16] Spotless --- .../datadog/profiling/controller/TempLocationManagerTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index 4476859bc01..214191b106f 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -62,8 +62,7 @@ void testFromConfigInvalid() { void testFromConfigNotWritable() throws Exception { Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test3"); Files.createDirectories( - myDir, - PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); + myDir, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); From 6e60ed61831359991741f61812dfd0c136f5f7a4 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 18 Nov 2024 18:41:14 +0100 Subject: [PATCH 05/16] Fix tests --- .../controller/TempLocationManagerTest.java | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index 214191b106f..55c958b858d 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -24,15 +24,6 @@ void testDefault() throws Exception { assertTrue(Files.isReadable(tempDir)); assertTrue(Files.isExecutable(tempDir)); assertTrue(tempDir.endsWith("pid_" + PidHelper.getPid())); - - // fake temp location - Path fakeTempDir = tempDir.getParent().resolve("pid_00000"); - Files.createDirectories(fakeTempDir); - tempLocationManager.cleanup(false); - // fake temp location should be deleted - // real temp location should be kept - assertFalse(Files.exists(fakeTempDir)); - assertTrue(Files.exists(tempDir)); } @Test @@ -69,4 +60,25 @@ void testFromConfigNotWritable() throws Exception { TempLocationManager tempLocationManager = new TempLocationManager(configProvider); assertThrows(IllegalStateException.class, tempLocationManager::getTempDir); } + + @Test + void testCleanup() throws Exception { + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test4"); + Files.createDirectories(myDir); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + Path tempDir = tempLocationManager.getTempDir(); + assertNotNull(tempDir); + + // fake temp location + Path fakeTempDir = tempDir.getParent().resolve("pid_00000"); + Files.createDirectories(fakeTempDir); + tempLocationManager.cleanup(false); + // fake temp location should be deleted + // real temp location should be kept + assertFalse(Files.exists(fakeTempDir)); + assertTrue(Files.exists(tempDir)); + } } From f65e89c9c78ee5007dd44f881e913527c0b5cb48 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 Nov 2024 16:38:38 +0100 Subject: [PATCH 06/16] Allow for parallel runs of TempLocationManagerTest --- .../profiling/controller/TempLocationManagerTest.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index 55c958b858d..d63f2f0b9f2 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -10,6 +10,8 @@ import java.nio.file.Paths; import java.nio.file.attribute.PosixFilePermissions; import java.util.Properties; +import java.util.UUID; + import org.junit.jupiter.api.Test; public class TempLocationManagerTest { @@ -28,7 +30,7 @@ void testDefault() throws Exception { @Test void testFromConfig() throws Exception { - Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test1"); + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); Files.createDirectories(myDir); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); @@ -41,7 +43,7 @@ void testFromConfig() throws Exception { @Test void testFromConfigInvalid() { - Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test2"); + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); // do not create the directory - it should trigger an exception Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); @@ -51,7 +53,7 @@ void testFromConfigInvalid() { @Test void testFromConfigNotWritable() throws Exception { - Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test3"); + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); Files.createDirectories( myDir, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); Properties props = new Properties(); @@ -63,7 +65,7 @@ void testFromConfigNotWritable() throws Exception { @Test void testCleanup() throws Exception { - Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), "test4"); + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); Files.createDirectories(myDir); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); From e4d56d25173baa5499b116509709f71b2d2df43d Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 Nov 2024 16:57:14 +0100 Subject: [PATCH 07/16] More standard temp randomization --- .../controller/TempLocationManagerTest.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index d63f2f0b9f2..e0c35d97b4a 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -11,7 +11,6 @@ import java.nio.file.attribute.PosixFilePermissions; import java.util.Properties; import java.util.UUID; - import org.junit.jupiter.api.Test; public class TempLocationManagerTest { @@ -30,8 +29,10 @@ void testDefault() throws Exception { @Test void testFromConfig() throws Exception { - Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); - Files.createDirectories(myDir); + Path myDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); @@ -53,9 +54,10 @@ void testFromConfigInvalid() { @Test void testFromConfigNotWritable() throws Exception { - Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); - Files.createDirectories( - myDir, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); + Path myDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); @@ -65,8 +67,10 @@ void testFromConfigNotWritable() throws Exception { @Test void testCleanup() throws Exception { - Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); - Files.createDirectories(myDir); + Path myDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); From fe0aecadd7b53b394fee179717dbb46da664daeb Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 Nov 2024 17:59:57 +0100 Subject: [PATCH 08/16] Debug disabling cleanup on startup --- .../controller/TempLocationManager.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index f35cbf0aefc..ed452595b7e 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -140,17 +140,17 @@ private TempLocationManager() { tempDir = baseTempDir.resolve("pid_" + pid); cleanupTask = CompletableFuture.runAsync(() -> cleanup(false)); - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - try { - cleanupTask.join(); - } finally { - cleanup(true); - } - }, - "Temp Location Manager Cleanup")); + // Runtime.getRuntime() + // .addShutdownHook( + // new Thread( + // () -> { + // try { + // cleanupTask.join(); + // } finally { + // cleanup(true); + // } + // }, + // "Temp Location Manager Cleanup")); } /** From 9f2cbf119933a2a69d6d422f45f8130596ede810 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 Nov 2024 19:08:55 +0100 Subject: [PATCH 09/16] Try to avoid racy cleanups --- .../controller/TempLocationManager.java | 45 ++++++++++++++----- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index ed452595b7e..970cc79d44e 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -11,6 +11,8 @@ import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.PosixFilePermissions; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.Set; import java.util.concurrent.CompletableFuture; import org.slf4j.Logger; @@ -35,9 +37,11 @@ private class CleanupVisitor implements FileVisitor { private final Set pidSet = PidHelper.getJavaPids(); private final boolean cleanSelf; + private final Instant cutoff; CleanupVisitor(boolean cleanSelf) { this.cleanSelf = cleanSelf; + this.cutoff = Instant.now().minus(cutoffSeconds, ChronoUnit.SECONDS); } @Override @@ -59,6 +63,9 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (Files.getLastModifiedTime(file).toInstant().isAfter(cutoff)) { + return FileVisitResult.SKIP_SUBTREE; + } Files.delete(file); return FileVisitResult.CONTINUE; } @@ -85,6 +92,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx private final Path baseTempDir; private final Path tempDir; + private final long cutoffSeconds; private final CompletableFuture cleanupTask; @@ -117,6 +125,19 @@ private TempLocationManager() { } TempLocationManager(ConfigProvider configProvider) { + // In order to avoid racy attempts to clean up files which are currently being processed in a + // JVM which is being shut down (the JVMs far in the shutdown routine may not be reported by + // 'jps' but still can be eg. processing JFR chunks) we will not clean up any files not older + // than '2 * PROFILING_UPLOAD_PERIOD' seconds. + // The reasoning is that even if the file is created immediately at JVM startup once it is + // 'PROFILING_UPLOAD_PERIOD' seconds old it gets processed to upload the final profile data and + // this processing will not take longer than another `PROFILING_UPLOAD_PERIOD' seconds. + // This is just an assumption but as long as the profiled application is working normally (eg. + // OS is not stalling) this assumption will hold. + cutoffSeconds = + configProvider.getLong( + ProfilingConfig.PROFILING_UPLOAD_PERIOD, + ProfilingConfig.PROFILING_UPLOAD_PERIOD_DEFAULT); Path configuredTempDir = Paths.get( configProvider.getString( @@ -140,21 +161,21 @@ private TempLocationManager() { tempDir = baseTempDir.resolve("pid_" + pid); cleanupTask = CompletableFuture.runAsync(() -> cleanup(false)); - // Runtime.getRuntime() - // .addShutdownHook( - // new Thread( - // () -> { - // try { - // cleanupTask.join(); - // } finally { - // cleanup(true); - // } - // }, - // "Temp Location Manager Cleanup")); + Runtime.getRuntime() + .addShutdownHook( + new Thread( + () -> { + try { + cleanupTask.join(); + } finally { + cleanup(true); + } + }, + "Temp Location Manager Cleanup")); } /** - * Get the temporary directory for the current process. + * Get the temporary directory for the current process. The directory will be removed at JVM exit. * * @return the temporary directory for the current process */ From 3c6f3ed42af7348b5f7c588ff8ff17d728a50588 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 19 Nov 2024 19:50:04 +0100 Subject: [PATCH 10/16] Improve the temp location manager api --- .../controller/TempLocationManager.java | 28 ++++++++++--- .../controller/TempLocationManagerTest.java | 40 ++++++++++++++----- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index 970cc79d44e..d2204cf2e02 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -158,6 +158,8 @@ private TempLocationManager() { String pid = PidHelper.getPid(); baseTempDir = configuredTempDir.resolve("ddprof"); + baseTempDir.toFile().deleteOnExit(); + tempDir = baseTempDir.resolve("pid_" + pid); cleanupTask = CompletableFuture.runAsync(() -> cleanup(false)); @@ -180,28 +182,42 @@ private TempLocationManager() { * @return the temporary directory for the current process */ public Path getTempDir() { - return getTempDir(true); + return getTempDir(null); + } + + /** + * Get the temporary subdirectory for the current process. The directory will be removed at JVM + * exit. + * + * @param subPath the relative subdirectory path, may be {@literal null} + * @return the temporary subdirectory for the current process + */ + public Path getTempDir(Path subPath) { + return getTempDir(subPath, true); } /** - * Get the temporary directory for the current process. + * Get the temporary subdirectory for the current process. * + * @param subPath the relative subdirectory path, may be {@literal null} * @param create if true, create the directory if it does not exist * @return the temporary directory for the current process * @throws IllegalStateException if the directory could not be created */ - public Path getTempDir(boolean create) { - if (create && !Files.exists(tempDir)) { + public Path getTempDir(Path subPath, boolean create) { + Path rslt = + subPath != null && !subPath.toString().isEmpty() ? tempDir.resolve(subPath) : tempDir; + if (create && !Files.exists(rslt)) { try { Files.createDirectories( - tempDir, + rslt, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); } catch (Exception e) { log.warn("Failed to create temp directory: {}", tempDir, e); throw new IllegalStateException("Failed to create temp directory: " + tempDir, e); } } - return tempDir; + return rslt; } void cleanup(boolean cleanSelf) { diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index e0c35d97b4a..373d0541f7a 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -12,32 +12,37 @@ import java.util.Properties; import java.util.UUID; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; public class TempLocationManagerTest { - @Test - void testDefault() throws Exception { + @ParameterizedTest + @ValueSource(strings = {"", "test1"}) + void testDefault(String subPath) throws Exception { TempLocationManager tempLocationManager = TempLocationManager.getInstance(true); - Path tempDir = tempLocationManager.getTempDir(); + Path tempDir = tempLocationManager.getTempDir(Paths.get(subPath)); assertNotNull(tempDir); assertTrue(Files.exists(tempDir)); assertTrue(Files.isDirectory(tempDir)); assertTrue(Files.isWritable(tempDir)); assertTrue(Files.isReadable(tempDir)); assertTrue(Files.isExecutable(tempDir)); - assertTrue(tempDir.endsWith("pid_" + PidHelper.getPid())); + assertTrue(tempDir.toString().contains("pid_" + PidHelper.getPid())); } - @Test - void testFromConfig() throws Exception { + @ParameterizedTest + @ValueSource(strings = {"", "test1"}) + void testFromConfig(String subPath) throws Exception { Path myDir = Files.createTempDirectory( "ddprof-test-", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + myDir.toFile().deleteOnExit(); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); TempLocationManager tempLocationManager = new TempLocationManager(configProvider); - Path tempDir = tempLocationManager.getTempDir(); + Path tempDir = tempLocationManager.getTempDir(Paths.get(subPath)); assertNotNull(tempDir); assertTrue(tempDir.toString().startsWith(myDir.toString())); } @@ -58,6 +63,7 @@ void testFromConfigNotWritable() throws Exception { Files.createTempDirectory( "ddprof-test-", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); + myDir.toFile().deleteOnExit(); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); @@ -65,22 +71,34 @@ void testFromConfigNotWritable() throws Exception { assertThrows(IllegalStateException.class, tempLocationManager::getTempDir); } - @Test - void testCleanup() throws Exception { + @ParameterizedTest + @ValueSource(strings = {"", "test1"}) + void testCleanup(String subPath) throws Exception { Path myDir = Files.createTempDirectory( "ddprof-test-", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + myDir.toFile().deleteOnExit(); Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + props.put( + ProfilingConfig.PROFILING_UPLOAD_PERIOD, + "0"); // to force immediate cleanup; must be a string value! ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); TempLocationManager tempLocationManager = new TempLocationManager(configProvider); - Path tempDir = tempLocationManager.getTempDir(); + Path tempDir = tempLocationManager.getTempDir(Paths.get(subPath)); assertNotNull(tempDir); // fake temp location - Path fakeTempDir = tempDir.getParent().resolve("pid_00000"); + Path fakeTempDir = tempDir.getParent(); + while (fakeTempDir != null && !fakeTempDir.endsWith("ddprof")) { + fakeTempDir = fakeTempDir.getParent(); + } + fakeTempDir = fakeTempDir.resolve("pid_0000"); Files.createDirectories(fakeTempDir); + Path tmpFile = Files.createFile(fakeTempDir.resolve("test.txt")); + tmpFile.toFile().deleteOnExit(); // make sure this is deleted at exit + fakeTempDir.toFile().deleteOnExit(); // also this one tempLocationManager.cleanup(false); // fake temp location should be deleted // real temp location should be kept From 17ef73a2ee3816c1a06d5df82065a1286ff6e724 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 20 Nov 2024 12:43:58 +0100 Subject: [PATCH 11/16] Blindly trying to fix the smoke tests --- .../java/datadog/trace/util/PidHelper.java | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/util/PidHelper.java b/internal-api/src/main/java/datadog/trace/util/PidHelper.java index eacce63374b..19a5c38eba5 100644 --- a/internal-api/src/main/java/datadog/trace/util/PidHelper.java +++ b/internal-api/src/main/java/datadog/trace/util/PidHelper.java @@ -5,10 +5,12 @@ import datadog.trace.context.TraceScope; import de.thetaphi.forbiddenapis.SuppressForbidden; import java.io.BufferedReader; +import java.io.IOException; import java.io.InputStreamReader; import java.lang.management.ManagementFactory; import java.util.Collections; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import org.slf4j.Logger; @@ -70,18 +72,29 @@ public static Set getJavaPids() { ProcessBuilder pb = new ProcessBuilder("jps"); try (TraceScope ignored = AgentTracer.get().muteTracing()) { Process p = pb.start(); + // start draining the subcommand's pipes asynchronously to avoid flooding them + CompletableFuture> collecting = + CompletableFuture.supplyAsync( + (Supplier>) + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getInputStream()))) { + return br.lines() + .filter(l -> !l.contains("jps")) + .map( + l -> { + int idx = l.indexOf(' '); + return l.substring(0, idx); + }) + .collect(java.util.stream.Collectors.toSet()); + } catch (IOException e) { + log.debug("Unable to list java processes via 'jps'", e); + return Collections.emptySet(); + } + }); if (p.waitFor(500, TimeUnit.MILLISECONDS)) { if (p.exitValue() == 0) { - try (BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream()))) { - return br.lines() - .filter(l -> !l.contains("jps")) - .map( - l -> { - int idx = l.indexOf(' '); - return l.substring(0, idx); - }) - .collect(java.util.stream.Collectors.toSet()); - } + return collecting.get(); } else { log.debug("Execution of 'jps' failed with exit code {}", p.exitValue()); } From 0fa642e280856d23ca05ea97053d4b4073925df5 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 20 Nov 2024 12:44:50 +0100 Subject: [PATCH 12/16] Blindly trying to fix the smoke tests --- .../java/datadog/trace/util/PidHelper.java | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/internal-api/src/main/java/datadog/trace/util/PidHelper.java b/internal-api/src/main/java/datadog/trace/util/PidHelper.java index 19a5c38eba5..c5ca80f0222 100644 --- a/internal-api/src/main/java/datadog/trace/util/PidHelper.java +++ b/internal-api/src/main/java/datadog/trace/util/PidHelper.java @@ -75,23 +75,22 @@ public static Set getJavaPids() { // start draining the subcommand's pipes asynchronously to avoid flooding them CompletableFuture> collecting = CompletableFuture.supplyAsync( - (Supplier>) - () -> { - try (BufferedReader br = - new BufferedReader(new InputStreamReader(p.getInputStream()))) { - return br.lines() - .filter(l -> !l.contains("jps")) - .map( - l -> { - int idx = l.indexOf(' '); - return l.substring(0, idx); - }) - .collect(java.util.stream.Collectors.toSet()); - } catch (IOException e) { - log.debug("Unable to list java processes via 'jps'", e); - return Collections.emptySet(); - } - }); + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getInputStream()))) { + return br.lines() + .filter(l -> !l.contains("jps")) + .map( + l -> { + int idx = l.indexOf(' '); + return l.substring(0, idx); + }) + .collect(java.util.stream.Collectors.toSet()); + } catch (IOException e) { + log.debug("Unable to list java processes via 'jps'", e); + return Collections.emptySet(); + } + }); if (p.waitFor(500, TimeUnit.MILLISECONDS)) { if (p.exitValue() == 0) { return collecting.get(); @@ -99,6 +98,7 @@ public static Set getJavaPids() { log.debug("Execution of 'jps' failed with exit code {}", p.exitValue()); } } else { + p.destroyForcibly(); log.debug("Execution of 'jps' timed out"); } } catch (Exception e) { From 5e0ed99602af19fa56cc173e3ad12be997a9c9ba Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 20 Nov 2024 18:21:29 +0100 Subject: [PATCH 13/16] Allow for slower jps execution --- internal-api/src/main/java/datadog/trace/util/PidHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/src/main/java/datadog/trace/util/PidHelper.java b/internal-api/src/main/java/datadog/trace/util/PidHelper.java index c5ca80f0222..4121d9da68d 100644 --- a/internal-api/src/main/java/datadog/trace/util/PidHelper.java +++ b/internal-api/src/main/java/datadog/trace/util/PidHelper.java @@ -91,7 +91,7 @@ public static Set getJavaPids() { return Collections.emptySet(); } }); - if (p.waitFor(500, TimeUnit.MILLISECONDS)) { + if (p.waitFor(1200, TimeUnit.MILLISECONDS)) { if (p.exitValue() == 0) { return collecting.get(); } else { From ff6ff648fd742d3ad4b267ac3a2dbc84c330f56b Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 20 Nov 2024 18:21:47 +0100 Subject: [PATCH 14/16] Add tests for concurrent cleanup --- .../controller/TempLocationManager.java | 67 +++++++++++++-- .../controller/TempLocationManagerTest.java | 83 +++++++++++++++++++ 2 files changed, 144 insertions(+), 6 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index d2204cf2e02..642c64bbb54 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -7,6 +7,7 @@ import java.nio.file.FileVisitResult; import java.nio.file.FileVisitor; import java.nio.file.Files; +import java.nio.file.NoSuchFileException; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; @@ -31,6 +32,33 @@ private static final class SingletonHolder { private static final TempLocationManager INSTANCE = new TempLocationManager(); } + interface CleanupHook extends FileVisitor { + CleanupHook EMPTY = new CleanupHook() {}; + + @Override + default FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + return null; + } + + @Override + default FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + return null; + } + + @Override + default FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + return null; + } + + @Override + default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + return null; + } + + default void onCleanupStart() {} + } + private class CleanupVisitor implements FileVisitor { private boolean shouldClean; @@ -47,6 +75,8 @@ private class CleanupVisitor implements FileVisitor { @Override public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + cleanupTestHook.preVisitDirectory(dir, attrs); + if (dir.equals(baseTempDir)) { return FileVisitResult.CONTINUE; } @@ -63,16 +93,23 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - if (Files.getLastModifiedTime(file).toInstant().isAfter(cutoff)) { - return FileVisitResult.SKIP_SUBTREE; + cleanupTestHook.visitFile(file, attrs); + try { + if (Files.getLastModifiedTime(file).toInstant().isAfter(cutoff)) { + return FileVisitResult.SKIP_SUBTREE; + } + Files.delete(file); + } catch (NoSuchFileException ignored) { + // another process has already cleaned it; ignore } - Files.delete(file); return FileVisitResult.CONTINUE; } @Override public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { - if (log.isDebugEnabled()) { + cleanupTestHook.visitFileFailed(file, exc); + // do not log files/directories removed by another process running concurrently + if (!(exc instanceof NoSuchFileException) && log.isDebugEnabled()) { log.debug("Failed to delete file {}", file, exc); } return FileVisitResult.CONTINUE; @@ -80,8 +117,16 @@ public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOExce @Override public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + cleanupTestHook.postVisitDirectory(dir, exc); + if (exc instanceof NoSuchFileException) { + return FileVisitResult.CONTINUE; + } if (shouldClean) { - Files.delete(dir); + try { + Files.delete(dir); + } catch (NoSuchFileException ignored) { + // another process has already cleaned it; ignore + } String fileName = dir.getFileName().toString(); // reset the flag only if we are done cleaning the top-level directory shouldClean = !fileName.startsWith("pid_"); @@ -96,6 +141,8 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx private final CompletableFuture cleanupTask; + private final CleanupHook cleanupTestHook; + /** * Get the singleton instance of the TempLocationManager. It will run the cleanup task in the * background. @@ -125,6 +172,12 @@ private TempLocationManager() { } TempLocationManager(ConfigProvider configProvider) { + this(configProvider, CleanupHook.EMPTY); + } + + TempLocationManager(ConfigProvider configProvider, CleanupHook testHook) { + cleanupTestHook = testHook; + // In order to avoid racy attempts to clean up files which are currently being processed in a // JVM which is being shut down (the JVMs far in the shutdown routine may not be reported by // 'jps' but still can be eg. processing JFR chunks) we will not clean up any files not older @@ -222,6 +275,7 @@ public Path getTempDir(Path subPath, boolean create) { void cleanup(boolean cleanSelf) { try { + cleanupTestHook.onCleanupStart(); Files.walkFileTree(baseTempDir, new CleanupVisitor(cleanSelf)); } catch (IOException e) { if (log.isDebugEnabled()) { @@ -232,7 +286,8 @@ void cleanup(boolean cleanSelf) { } } - private void waitForCleanup() { + // accessible for tests + void waitForCleanup() { cleanupTask.join(); } } diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index 373d0541f7a..a8f9803f142 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -5,12 +5,16 @@ import datadog.trace.api.config.ProfilingConfig; import datadog.trace.bootstrap.config.provider.ConfigProvider; import datadog.trace.util.PidHelper; +import java.io.IOException; +import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.PosixFilePermissions; import java.util.Properties; import java.util.UUID; +import java.util.concurrent.Phaser; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -105,4 +109,83 @@ void testCleanup(String subPath) throws Exception { assertFalse(Files.exists(fakeTempDir)); assertTrue(Files.exists(tempDir)); } + + @ParameterizedTest + @ValueSource(strings = {"preVisitDirectory", "visitFile", "postVisitDirectory"}) + void testConcurrentCleanup(String section) throws Exception { + /* This test simulates concurrent cleanup + It utilizes a special hook to create synchronization points in the filetree walking routine, + allowing to delete the files at various points of execution. + The test makes sure that the cleanup is not interrupted and the file and directory being deleted + stays deleted. + */ + Path baseDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + + Path fakeTempDir = baseDir.resolve("ddprof/pid_1234/scratch"); + Files.createDirectories(fakeTempDir); + Path fakeTempFile = fakeTempDir.resolve("libxxx.so"); + Files.createFile(fakeTempFile); + + fakeTempDir.toFile().deleteOnExit(); + fakeTempFile.toFile().deleteOnExit(); + + Phaser phaser = new Phaser(2); + + TempLocationManager.CleanupHook blocker = + new TempLocationManager.CleanupHook() { + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + if (section.equals("preVisitDirectory") && dir.equals(fakeTempDir)) { + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); + } + return null; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) + throws IOException { + if (section.equals("visitFile") && file.equals(fakeTempFile)) { + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); + } + return null; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (section.equals("postVisitDirectory") && dir.equals(fakeTempDir)) { + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); + } + return null; + } + }; + + TempLocationManager mgr = instance(baseDir, blocker); + + // wait for the cleanup start + phaser.arriveAndAwaitAdvance(); + Files.deleteIfExists(fakeTempFile); + phaser.arriveAndAwaitAdvance(); + mgr.waitForCleanup(); + + assertFalse(Files.exists(fakeTempFile)); + assertFalse(Files.exists(fakeTempDir)); + } + + private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook) + throws IOException { + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString()); + props.put( + ProfilingConfig.PROFILING_UPLOAD_PERIOD, + "0"); // to force immediate cleanup; must be a string value! + + return new TempLocationManager(ConfigProvider.withPropertiesOverride(props), cleanupHook); + } } From f438c9e4a7fe015f8563ea2e687687e3f92efc2f Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 22 Nov 2024 13:50:54 +0100 Subject: [PATCH 15/16] Add support for timeout to cleanup task --- .../controller/TempLocationManager.java | 124 +++++++++++++++--- .../controller/TempLocationManagerTest.java | 69 +++++++++- 2 files changed, 173 insertions(+), 20 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index 642c64bbb54..e1d940f595a 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -16,6 +16,9 @@ import java.time.temporal.ChronoUnit; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,7 +59,7 @@ default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOE return null; } - default void onCleanupStart() {} + default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {} } private class CleanupVisitor implements FileVisitor { @@ -66,15 +69,35 @@ private class CleanupVisitor implements FileVisitor { private final boolean cleanSelf; private final Instant cutoff; + private final Instant timeoutTarget; - CleanupVisitor(boolean cleanSelf) { + private boolean terminated = false; + + CleanupVisitor(boolean cleanSelf, long timeout, TimeUnit unit) { this.cleanSelf = cleanSelf; this.cutoff = Instant.now().minus(cutoffSeconds, ChronoUnit.SECONDS); + this.timeoutTarget = + timeout > -1 + ? Instant.now().plus(TimeUnit.MILLISECONDS.convert(timeout, unit), ChronoUnit.MILLIS) + : null; + } + + boolean isTerminated() { + return terminated; + } + + private boolean isTimedOut() { + return timeoutTarget != null && Instant.now().isAfter(timeoutTarget); } @Override public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } cleanupTestHook.preVisitDirectory(dir, attrs); if (dir.equals(baseTempDir)) { @@ -93,6 +116,11 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } cleanupTestHook.visitFile(file, attrs); try { if (Files.getLastModifiedTime(file).toInstant().isAfter(cutoff)) { @@ -107,6 +135,11 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO @Override public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } cleanupTestHook.visitFileFailed(file, exc); // do not log files/directories removed by another process running concurrently if (!(exc instanceof NoSuchFileException) && log.isDebugEnabled()) { @@ -117,6 +150,11 @@ public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOExce @Override public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } cleanupTestHook.postVisitDirectory(dir, exc); if (exc instanceof NoSuchFileException) { return FileVisitResult.CONTINUE; @@ -162,7 +200,11 @@ public static TempLocationManager getInstance() { static TempLocationManager getInstance(boolean waitForCleanup) { TempLocationManager instance = SingletonHolder.INSTANCE; if (waitForCleanup) { - instance.waitForCleanup(); + try { + instance.waitForCleanup(5, TimeUnit.SECONDS); + } catch (TimeoutException ignored) { + + } } return instance; } @@ -216,17 +258,21 @@ private TempLocationManager() { tempDir = baseTempDir.resolve("pid_" + pid); cleanupTask = CompletableFuture.runAsync(() -> cleanup(false)); - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - try { - cleanupTask.join(); - } finally { - cleanup(true); - } - }, - "Temp Location Manager Cleanup")); + Thread selfCleanup = + new Thread( + () -> { + try { + waitForCleanup(1, TimeUnit.SECONDS); + } catch (TimeoutException e) { + log.info( + "Cleanup task timed out. {} temp directory might not have been cleaned up properly", + tempDir); + } finally { + cleanup(true); + } + }, + "Temp Location Manager Cleanup"); + Runtime.getRuntime().addShutdownHook(selfCleanup); } /** @@ -273,10 +319,35 @@ public Path getTempDir(Path subPath, boolean create) { return rslt; } - void cleanup(boolean cleanSelf) { + /** + * Walk the base temp directory recursively and remove all inactive per-process entries. No + * timeout is applied. + * + * @param cleanSelf {@literal true} will call only this process' temp directory, {@literal false} + * only the other processes will be cleaned up + * @return {@literal true} if cleanup fully succeeded or {@literal false} otherwise (eg. + * interruption etc.) + */ + boolean cleanup(boolean cleanSelf) { + return cleanup(cleanSelf, -1, TimeUnit.SECONDS); + } + + /** + * Walk the base temp directory recursively and remove all inactive per-process entries + * + * @param cleanSelf {@literal true} will call only this process' temp directory, {@literal false} + * only the other processes will be cleaned up + * @param timeout the task timeout; may be {@literal -1} to signal no timeout + * @param unit the task timeout unit + * @return {@literal true} if cleanup fully succeeded or {@literal false} otherwise (timeout, + * interruption etc.) + */ + boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) { try { - cleanupTestHook.onCleanupStart(); - Files.walkFileTree(baseTempDir, new CleanupVisitor(cleanSelf)); + cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit); + CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit); + Files.walkFileTree(baseTempDir, visitor); + return !visitor.isTerminated(); } catch (IOException e) { if (log.isDebugEnabled()) { log.warn("Unable to cleanup temp location {}", baseTempDir, e); @@ -284,10 +355,25 @@ void cleanup(boolean cleanSelf) { log.warn("Unable to cleanup temp location {}", baseTempDir); } } + return false; } // accessible for tests - void waitForCleanup() { - cleanupTask.join(); + void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException { + try { + cleanupTask.get(timeout, unit); + } catch (InterruptedException e) { + cleanupTask.cancel(true); + Thread.currentThread().interrupt(); + } catch (TimeoutException e) { + cleanupTask.cancel(true); + throw e; + } catch (ExecutionException e) { + if (log.isDebugEnabled()) { + log.debug("Failed to cleanup temp directory: {}", tempDir, e); + } else { + log.debug("Failed to cleanup temp directory: {}", tempDir); + } + } } } diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index a8f9803f142..fdc839ffaae 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -12,11 +12,18 @@ import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import java.nio.file.attribute.PosixFilePermissions; +import java.util.ArrayList; +import java.util.List; import java.util.Properties; import java.util.UUID; import java.util.concurrent.Phaser; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; public class TempLocationManagerTest { @@ -172,12 +179,72 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx phaser.arriveAndAwaitAdvance(); Files.deleteIfExists(fakeTempFile); phaser.arriveAndAwaitAdvance(); - mgr.waitForCleanup(); + mgr.waitForCleanup(30, TimeUnit.SECONDS); assertFalse(Files.exists(fakeTempFile)); assertFalse(Files.exists(fakeTempDir)); } + @ParameterizedTest + @MethodSource("timeoutTestArguments") + void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception { + long timeoutMs = 500; + TempLocationManager.CleanupHook delayer = + new TempLocationManager.CleanupHook() { + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + if (section.equals("preVisitDirectory")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.preVisitDirectory(dir, attrs); + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + if (section.equals("visitFileFailed")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.visitFileFailed(file, exc); + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (section.equals("postVisitDirectory")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.postVisitDirectory(dir, exc); + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) + throws IOException { + if (section.equals("visitFile")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.visitFile(file, attrs); + } + }; + Path baseDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + TempLocationManager instance = instance(baseDir, delayer); + boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS); + assertTrue(rslt); + } + + private static Stream timeoutTestArguments() { + List argumentsList = new ArrayList<>(); + for (boolean selfCleanup : new boolean[] {true, false}) { + for (String intercepted : + new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) { + argumentsList.add(Arguments.of(selfCleanup, intercepted)); + } + } + return argumentsList.stream(); + } + private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook) throws IOException { Properties props = new Properties(); From 84d05011884db0bfb293330f6edc2d2efd1d4cd6 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 22 Nov 2024 13:53:32 +0100 Subject: [PATCH 16/16] Update dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java Co-authored-by: datadog-datadog-prod-us1[bot] <88084959+datadog-datadog-prod-us1[bot]@users.noreply.github.com> --- .../com/datadog/profiling/controller/TempLocationManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index e1d940f595a..f82806f4ef0 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -71,7 +71,7 @@ private class CleanupVisitor implements FileVisitor { private final Instant cutoff; private final Instant timeoutTarget; - private boolean terminated = false; + private boolean terminated; CleanupVisitor(boolean cleanSelf, long timeout, TimeUnit unit) { this.cleanSelf = cleanSelf;