Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report profiler initialization and configuration errors to telemetry #8171

Merged
merged 2 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,9 @@ public void publish() {
datadogProfiler.recordSetting(
SSI_MECHANISM, profilerActivationSetting.ssiMechanism.name().toLowerCase());
}

@Override
protected String profilerKind() {
return "datadog";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,9 @@ public void publish() {
.commit();
}
}

@Override
protected String profilerKind() {
return "jfr";
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.datadog.profiling.controller;

import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;

import datadog.trace.api.Config;
import datadog.trace.api.Platform;
import datadog.trace.api.config.ProfilingConfig;
Expand All @@ -16,9 +18,13 @@
import java.nio.file.Paths;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Capture the profiler config first and allow emitting the setting events per each recording. */
public abstract class ProfilerSettingsSupport {
private static final Logger logger = LoggerFactory.getLogger(ProfilerSettingsSupport.class);

protected static final class ProfilerActivationSetting {
public enum Ssi {
INJECTED_AGENT,
Expand Down Expand Up @@ -177,6 +183,10 @@ protected ProfilerSettingsSupport(
// usually set via DD_INSTRUMENTATION_INSTALL_TYPE env var
configProvider.getString("instrumentation.install.type");
this.profilerActivationSetting = getProfilerActivation(configProvider);

logger.debug(
SEND_TELEMETRY,
"Profiler settings: " + this); // telemetry receiver does not recognize formatting
}

private static String getServiceInjection(ConfigProvider configProvider) {
Expand Down Expand Up @@ -231,6 +241,8 @@ private static String getDefaultAuxiliaryProfiler() {
/** To be defined in controller specific way. Eg. one could emit JFR events. */
public abstract void publish();

protected abstract String profilerKind();

private static String readPerfEventsParanoidSetting() {
String value = "unknown";
if (Platform.isLinux()) {
Expand All @@ -244,4 +256,35 @@ private static String readPerfEventsParanoidSetting() {
}
return value;
}

@Override
public String toString() {
// spotless:off
return "{"
+ "kind='" + profilerKind() + '\''
+ ", uploadPeriod=" + uploadPeriod
+ ", uploadTimeout=" + uploadTimeout
+ ", uploadCompression='" + uploadCompression + '\''
+ ", allocationProfilingEnabled=" + allocationProfilingEnabled
+ ", heapProfilingEnabled=" + heapProfilingEnabled
+ ", startForceFirst=" + startForceFirst
+ ", templateOverride='" + templateOverride + '\''
+ ", exceptionSampleLimit=" + exceptionSampleLimit
+ ", exceptionHistogramTopItems=" + exceptionHistogramTopItems
+ ", exceptionHistogramMaxSize=" + exceptionHistogramMaxSize
+ ", hotspotsEnabled=" + hotspotsEnabled
+ ", endpointsEnabled=" + endpointsEnabled
+ ", auxiliaryProfiler='" + auxiliaryProfiler + '\''
+ ", perfEventsParanoid='" + perfEventsParanoid + '\''
+ ", hasNativeStacks=" + hasNativeStacks
+ ", seLinuxStatus='" + seLinuxStatus + '\''
+ ", serviceInstrumentationType='" + serviceInstrumentationType + '\''
+ ", serviceInjection='" + serviceInjection + '\''
+ ", ddprofUnavailableReason='" + ddprofUnavailableReason + '\''
+ ", profilerActivationSetting=" + profilerActivationSetting
+ ", stackDepth=" + stackDepth
+ ", hasJfrStackDepthApplied=" + hasJfrStackDepthApplied
+ '}';
// spotless:on
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
*/
package com.datadog.profiling.controller;

import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
import static datadog.trace.util.AgentThreadFactory.AgentThread.PROFILER_RECORDING_SCHEDULER;

import datadog.trace.api.Platform;
import datadog.trace.api.profiling.ProfilingListenersRegistry;
import datadog.trace.api.profiling.ProfilingSnapshot;
import datadog.trace.api.profiling.RecordingData;
Expand Down Expand Up @@ -160,7 +162,15 @@ private void startProfilingRecording() {
started = true;
} catch (UnsupportedEnvironmentException unsupported) {
log.warn(
"Datadog Profiling was enabled on an unsupported JVM, will not profile application. See {} for more details about supported JVMs.",
SEND_TELEMETRY,
"Datadog Profiling was enabled on an unsupported JVM, will not profile application. "
+ "(OS: {}, JVM: lang={}, runtime={}, vendor={}) See {} for more details about supported JVMs.",
Platform.isLinux()
? "Linux"
: Platform.isWindows() ? "Windows" : Platform.isMac() ? "MacOS" : "Other",
Platform.getLangVersion(),
Platform.getRuntimeVersion(),
Platform.getRuntimeVendor(),
"https://docs.datadoghq.com/profiler/enabling/java/?tab=commandarguments#requirements");
} catch (Throwable t) {
if (t instanceof RuntimeException) {
Expand All @@ -171,6 +181,7 @@ private void startProfilingRecording() {
if (msg != null && msg.contains("com.oracle.jrockit:type=FlightRecorder")) {
// Yes, the commercial JFR is not enabled
log.warn(
SEND_TELEMETRY,
"You're running Oracle JDK 8. Datadog Continuous Profiler for Java depends on Java Flight Recorder, which requires a paid license in Oracle JDK 8. If you have one, please add the following `java` command line args: ‘-XX:+UnlockCommercialFeatures -XX:+FlightRecorder’. Alternatively, you can use a different Java 8 distribution like OpenJDK, where Java Flight Recorder is free.");
// Do not log the underlying exception
t = null;
Expand All @@ -183,7 +194,7 @@ private void startProfilingRecording() {
if (t instanceof IllegalStateException && "Shutdown in progress".equals(t.getMessage())) {
log.debug("Shutdown in progress, cannot start profiling");
} else {
log.error("Fatal exception during profiling startup", t);
log.error(SEND_TELEMETRY, "Fatal exception during profiling startup", t);
throw t instanceof RuntimeException ? (RuntimeException) t : new RuntimeException(t);
}
}
Expand Down Expand Up @@ -267,7 +278,7 @@ public void snapshot(boolean onShutdown) {
lastSnapshot = Instant.now();
}
} catch (final Exception e) {
log.error("Exception in profiling thread, continuing", e);
log.error(SEND_TELEMETRY, "Exception in profiling thread, continuing", e);
} catch (final Throwable t) {
/*
Try to continue even after fatal exception. It seems to be useful to attempt to store profile when this happens.
Expand All @@ -276,7 +287,7 @@ public void snapshot(boolean onShutdown) {
Another reason is that it may be bad to stop profiling if the rest of the app is continuing.
*/
try {
log.error("Fatal exception in profiling thread, trying to continue", t);
log.error(SEND_TELEMETRY, "Fatal exception in profiling thread, trying to continue", t);
} catch (final Throwable t2) {
// This should almost never happen and there is not much we can do here in cases like
// OutOfMemoryError, so we will just ignore this.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.datadog.profiling.controller;

import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;

import datadog.trace.api.config.ProfilingConfig;
import datadog.trace.bootstrap.config.provider.ConfigProvider;
import datadog.trace.util.PidHelper;
Expand Down Expand Up @@ -239,6 +241,7 @@ private TempLocationManager() {
ProfilingConfig.PROFILING_TEMP_DIR, ProfilingConfig.PROFILING_TEMP_DIR_DEFAULT));
if (!Files.exists(configuredTempDir)) {
log.warn(
SEND_TELEMETRY,
"Base temp directory, as defined in '"
+ ProfilingConfig.PROFILING_TEMP_DIR
+ "' does not exist: "
Expand Down Expand Up @@ -312,7 +315,7 @@ public Path getTempDir(Path subPath, boolean create) {
rslt,
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
} catch (Exception e) {
log.warn("Failed to create temp directory: {}", tempDir, e);
log.warn(SEND_TELEMETRY, "Failed to create temp directory: {}", tempDir, e);
throw new IllegalStateException("Failed to create temp directory: " + tempDir, e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static datadog.trace.api.config.ProfilingConfig.PROFILING_START_FORCE_FIRST;
import static datadog.trace.api.config.ProfilingConfig.PROFILING_START_FORCE_FIRST_DEFAULT;
import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
import static datadog.trace.util.AgentThreadFactory.AGENT_THREAD_GROUP;

import com.datadog.profiling.controller.ConfigurationException;
Expand Down Expand Up @@ -166,10 +167,10 @@ public static synchronized void run(
}
} catch (final UnsupportedEnvironmentException e) {
log.warn(e.getMessage());
log.debug("", e);
log.debug(SEND_TELEMETRY, "Unsupported environment for Datadog profiler", e);
} catch (final ConfigurationException e) {
log.warn("Failed to initialize profiling agent! {}", e.getMessage());
log.debug("Failed to initialize profiling agent!", e);
log.debug(SEND_TELEMETRY, "Failed to initialize profiling agent!", e);
}
}
}
Expand Down
Loading