diff --git a/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs b/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs index 52e2151441..6d488c217b 100644 --- a/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs +++ b/src/BenchmarkDotNet/Attributes/MemoryDiagnoserAttribute.cs @@ -10,9 +10,10 @@ public class MemoryDiagnoserAttribute : Attribute, IConfigSource public IConfig Config { get; } /// Display Garbage Collections per Generation columns (Gen 0, Gen 1, Gen 2). True by default. - public MemoryDiagnoserAttribute(bool displayGenColumns = true) + /// If true, monitoring will be enabled and survived memory will be measured on the first benchmark run. + public MemoryDiagnoserAttribute(bool displayGenColumns = true, bool includeSurvived = false) { - Config = ManualConfig.CreateEmpty().AddDiagnoser(new MemoryDiagnoser(new MemoryDiagnoserConfig(displayGenColumns))); + Config = ManualConfig.CreateEmpty().AddDiagnoser(new MemoryDiagnoser(new MemoryDiagnoserConfig(displayGenColumns, includeSurvived))); } } } \ No newline at end of file diff --git a/src/BenchmarkDotNet/Code/CodeGenerator.cs b/src/BenchmarkDotNet/Code/CodeGenerator.cs index 005564b77c..91cf3e0572 100644 --- a/src/BenchmarkDotNet/Code/CodeGenerator.cs +++ b/src/BenchmarkDotNet/Code/CodeGenerator.cs @@ -63,6 +63,7 @@ internal static string Generate(BuildPartition buildPartition) .Replace("$PassArguments$", passArguments) .Replace("$EngineFactoryType$", GetEngineFactoryTypeName(benchmark)) .Replace("$MeasureExtraStats$", buildInfo.Config.HasExtraStatsDiagnoser() ? "true" : "false") + .Replace("$MeasureSurvivedMemory$", buildInfo.Config.HasSurvivedMemoryDiagnoser() ? "true" : "false") .Replace("$DisassemblerEntryMethodName$", DisassemblerConstants.DisassemblerEntryMethodName) .Replace("$WorkloadMethodCall$", provider.GetWorkloadMethodCall(passArguments)) .RemoveRedundantIfDefines(compilationId); diff --git a/src/BenchmarkDotNet/Configs/ImmutableConfig.cs b/src/BenchmarkDotNet/Configs/ImmutableConfig.cs index b6e03126fd..0ffa20c858 100644 --- a/src/BenchmarkDotNet/Configs/ImmutableConfig.cs +++ b/src/BenchmarkDotNet/Configs/ImmutableConfig.cs @@ -111,6 +111,8 @@ internal ImmutableConfig( public bool HasMemoryDiagnoser() => diagnosers.OfType().Any(); + public bool HasSurvivedMemoryDiagnoser() => diagnosers.Any(diagnoser => diagnoser is MemoryDiagnoser md && md.Config.IncludeSurvived); + public bool HasThreadingDiagnoser() => diagnosers.Contains(ThreadingDiagnoser.Default); public bool HasExceptionDiagnoser() => diagnosers.Contains(ExceptionDiagnoser.Default); diff --git a/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs b/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs index e3aed1fedd..3c4e1f2e26 100644 --- a/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs +++ b/src/BenchmarkDotNet/ConsoleArguments/CommandLineOptions.cs @@ -62,6 +62,9 @@ public bool UseDisassemblyDiagnoser [Option('a', "artifacts", Required = false, HelpText = "Valid path to accessible directory")] public DirectoryInfo ArtifactsDirectory { get; set; } + [Option("memorySurvived", Required = false, Default = false, HelpText = "Measures survived memory.")] + public bool UseSurvivedMemoryDiagnoser { get; set; } + [Option("outliers", Required = false, Default = OutlierMode.RemoveUpper, HelpText = "DontRemove/RemoveUpper/RemoveLower/RemoveAll")] public OutlierMode Outliers { get; set; } diff --git a/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs b/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs index 355f1a6b75..da5963677b 100644 --- a/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs +++ b/src/BenchmarkDotNet/ConsoleArguments/ConfigParser.cs @@ -343,8 +343,11 @@ private static IConfig CreateConfig(CommandLineOptions options, IConfig globalCo .Select(counterName => (HardwareCounter)Enum.Parse(typeof(HardwareCounter), counterName, ignoreCase: true)) .ToArray()); - if (options.UseMemoryDiagnoser) + if (options.UseSurvivedMemoryDiagnoser) + config.AddDiagnoser(new MemoryDiagnoser(new MemoryDiagnoserConfig(includeSurvived: true))); + else if (options.UseMemoryDiagnoser) config.AddDiagnoser(MemoryDiagnoser.Default); + if (options.UseThreadingDiagnoser) config.AddDiagnoser(ThreadingDiagnoser.Default); if (options.UseExceptionDiagnoser) diff --git a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs index e9c9fd7ae5..debc5c52d8 100644 --- a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs +++ b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoser.cs @@ -8,6 +8,7 @@ using BenchmarkDotNet.Reports; using BenchmarkDotNet.Running; using BenchmarkDotNet.Validators; +using Perfolizer.Metrology; namespace BenchmarkDotNet.Diagnosers { @@ -42,6 +43,26 @@ public IEnumerable ProcessResults(DiagnoserResults diagnoserResults) } yield return new Metric(AllocatedMemoryMetricDescriptor.Instance, diagnoserResults.GcStats.GetBytesAllocatedPerOperation(diagnoserResults.BenchmarkCase) ?? double.NaN); + + if (Config.IncludeSurvived) + { + yield return new Metric(SurvivedMemoryMetricDescriptor.Instance, diagnoserResults.GcStats.SurvivedBytes ?? double.NaN); + } + } + + private class SurvivedMemoryMetricDescriptor : IMetricDescriptor + { + internal static readonly IMetricDescriptor Instance = new SurvivedMemoryMetricDescriptor(); + + public string Id => "Survived Memory"; + public string DisplayName => "Survived"; + public string Legend => "Memory survived after the first operation (managed only, inclusive, 1KB = 1024B)"; + public string NumberFormat => "N0"; + public UnitType UnitType => UnitType.Size; + public string Unit => SizeUnit.B.Abbreviation; + public bool TheGreaterTheBetter => false; + public int PriorityInCategory { get; } = AllocatedMemoryMetricDescriptor.Instance.PriorityInCategory + 1; + public bool GetIsAvailable(Metric metric) => true; } private class GarbageCollectionsMetricDescriptor : IMetricDescriptor diff --git a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs index cb5eb7221e..e7fd17a645 100644 --- a/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs +++ b/src/BenchmarkDotNet/Diagnosers/MemoryDiagnoserConfig.cs @@ -5,12 +5,15 @@ namespace BenchmarkDotNet.Diagnosers public class MemoryDiagnoserConfig { /// Display Garbage Collections per Generation columns (Gen 0, Gen 1, Gen 2). True by default. + /// If true, monitoring will be enabled and survived memory will be measured on the first benchmark run. [PublicAPI] - public MemoryDiagnoserConfig(bool displayGenColumns = true) + public MemoryDiagnoserConfig(bool displayGenColumns = true, bool includeSurvived = false) { DisplayGenColumns = displayGenColumns; + IncludeSurvived = includeSurvived; } public bool DisplayGenColumns { get; } + public bool IncludeSurvived { get; } } } \ No newline at end of file diff --git a/src/BenchmarkDotNet/Engines/Engine.cs b/src/BenchmarkDotNet/Engines/Engine.cs index 29d2e0ddd7..f535be4095 100644 --- a/src/BenchmarkDotNet/Engines/Engine.cs +++ b/src/BenchmarkDotNet/Engines/Engine.cs @@ -19,6 +19,7 @@ public class Engine : IEngine [PublicAPI] public IHost Host { get; } [PublicAPI] public Action WorkloadAction { get; } + [PublicAPI] public Action WorkloadActionNoUnroll { get; } [PublicAPI] public Action Dummy1Action { get; } [PublicAPI] public Action Dummy2Action { get; } [PublicAPI] public Action Dummy3Action { get; } @@ -44,19 +45,22 @@ public class Engine : IEngine private readonly EnginePilotStage pilotStage; private readonly EngineWarmupStage warmupStage; private readonly EngineActualStage actualStage; - private readonly bool includeExtraStats; private readonly Random random; + private readonly bool includeExtraStats, includeSurvivedMemory; + + private long? survivedBytes; + private bool survivedBytesMeasured; internal Engine( IHost host, IResolver resolver, - Action dummy1Action, Action dummy2Action, Action dummy3Action, Action overheadAction, Action workloadAction, Job targetJob, + Action dummy1Action, Action dummy2Action, Action dummy3Action, Action overheadAction, Action workloadAction, Action workloadActionNoUnroll, Job targetJob, Action globalSetupAction, Action globalCleanupAction, Action iterationSetupAction, Action iterationCleanupAction, long operationsPerInvoke, - bool includeExtraStats, string benchmarkName) + bool includeExtraStats, bool includeSurvivedMemory, string benchmarkName) { - Host = host; OverheadAction = overheadAction; + WorkloadActionNoUnroll = workloadActionNoUnroll; Dummy1Action = dummy1Action; Dummy2Action = dummy2Action; Dummy3Action = dummy3Action; @@ -69,6 +73,7 @@ internal Engine( OperationsPerInvoke = operationsPerInvoke; this.includeExtraStats = includeExtraStats; BenchmarkName = benchmarkName; + this.includeSurvivedMemory = includeSurvivedMemory; Resolver = resolver; @@ -86,6 +91,14 @@ internal Engine( random = new Random(12345); // we are using constant seed to try to get repeatable results } + internal Engine WithInitialData(Engine other) + { + // Copy the survived bytes from the other engine so we only measure it once. + survivedBytes = other.survivedBytes; + survivedBytesMeasured = other.survivedBytesMeasured; + return this; + } + public void Dispose() { try @@ -168,6 +181,17 @@ public Measurement RunIteration(IterationData data) Span stackMemory = randomizeMemory ? stackalloc byte[random.Next(32)] : Span.Empty; + bool needsSurvivedMeasurement = includeSurvivedMemory && !isOverhead && !survivedBytesMeasured; + if (needsSurvivedMeasurement && GcStats.InitTotalBytes()) + { + // Measure survived bytes for only the first invocation. + survivedBytesMeasured = true; + long beforeBytes = GcStats.GetTotalBytes(); + WorkloadActionNoUnroll(1); + long afterBytes = GcStats.GetTotalBytes(); + survivedBytes = afterBytes - beforeBytes; + } + // Measure var clock = Clock.Start(); action(invokeCount / unrollFactor); @@ -218,8 +242,8 @@ public Measurement RunIteration(IterationData data) IterationCleanupAction(); // we run iteration cleanup after collecting GC stats var totalOperationsCount = data.InvokeCount * OperationsPerInvoke; - GcStats gcStats = (finalGcStats - initialGcStats).WithTotalOperations(totalOperationsCount); - ThreadingStats threadingStats = (finalThreadingStats - initialThreadingStats).WithTotalOperations(data.InvokeCount * OperationsPerInvoke); + GcStats gcStats = (finalGcStats - initialGcStats).WithTotalOperationsAndSurvivedBytes(totalOperationsCount, survivedBytes); + ThreadingStats threadingStats = (finalThreadingStats - initialThreadingStats).WithTotalOperations(totalOperationsCount); return (gcStats, threadingStats, exceptionsStats.ExceptionsCount / (double)totalOperationsCount); } @@ -253,7 +277,7 @@ private void GcCollect() ForceGcCollect(); } - private static void ForceGcCollect() + internal static void ForceGcCollect() { GC.Collect(); GC.WaitForPendingFinalizers(); diff --git a/src/BenchmarkDotNet/Engines/EngineFactory.cs b/src/BenchmarkDotNet/Engines/EngineFactory.cs index 0588218522..633d722544 100644 --- a/src/BenchmarkDotNet/Engines/EngineFactory.cs +++ b/src/BenchmarkDotNet/Engines/EngineFactory.cs @@ -69,10 +69,12 @@ public IEngine CreateReadyToRun(EngineParameters engineParameters) .WithMinInvokeCount(2) // the minimum is 2 (not the default 4 which can be too much and not 1 which we already know is not enough) .WithEvaluateOverhead(false); // it's something very time consuming, it overhead is too small compared to total time - return CreateEngine(engineParameters, needsPilot, engineParameters.OverheadActionNoUnroll, engineParameters.WorkloadActionNoUnroll); + return CreateEngine(engineParameters, needsPilot, engineParameters.OverheadActionNoUnroll, engineParameters.WorkloadActionNoUnroll) + .WithInitialData(singleActionEngine); } - var multiActionEngine = CreateMultiActionEngine(engineParameters); + var multiActionEngine = CreateMultiActionEngine(engineParameters) + .WithInitialData(singleActionEngine); DeadCodeEliminationHelper.KeepAliveWithoutBoxing(Jit(multiActionEngine, ++jitIndex, invokeCount: defaultUnrollFactor, unrollFactor: defaultUnrollFactor)); @@ -118,6 +120,7 @@ private static Engine CreateEngine(EngineParameters engineParameters, Job job, A engineParameters.Dummy3Action, idle, main, + engineParameters.WorkloadActionNoUnroll, job, engineParameters.GlobalSetupAction, engineParameters.GlobalCleanupAction, @@ -125,6 +128,7 @@ private static Engine CreateEngine(EngineParameters engineParameters, Job job, A engineParameters.IterationCleanupAction, engineParameters.OperationsPerInvoke, engineParameters.MeasureExtraStats, + engineParameters.MeasureSurvivedMemory, engineParameters.BenchmarkName); } } diff --git a/src/BenchmarkDotNet/Engines/EngineParameters.cs b/src/BenchmarkDotNet/Engines/EngineParameters.cs index ec61582529..337a5a2acd 100644 --- a/src/BenchmarkDotNet/Engines/EngineParameters.cs +++ b/src/BenchmarkDotNet/Engines/EngineParameters.cs @@ -27,6 +27,8 @@ public class EngineParameters public Action IterationCleanupAction { get; set; } public bool MeasureExtraStats { get; set; } + public bool MeasureSurvivedMemory { get; set; } + [PublicAPI] public string BenchmarkName { get; set; } public bool NeedsJitting => TargetJob.ResolveValue(RunMode.RunStrategyCharacteristic, DefaultResolver).NeedsJitting(); diff --git a/src/BenchmarkDotNet/Engines/GcStats.cs b/src/BenchmarkDotNet/Engines/GcStats.cs index ca60b0deea..287e984168 100644 --- a/src/BenchmarkDotNet/Engines/GcStats.cs +++ b/src/BenchmarkDotNet/Engines/GcStats.cs @@ -8,7 +8,7 @@ namespace BenchmarkDotNet.Engines { - public struct GcStats : IEquatable + public readonly struct GcStats : IEquatable { internal const string ResultsLinePrefix = "// GC: "; @@ -16,13 +16,14 @@ public struct GcStats : IEquatable public static readonly GcStats Empty = default; - private GcStats(int gen0Collections, int gen1Collections, int gen2Collections, long? allocatedBytes, long totalOperations) + private GcStats(int gen0Collections, int gen1Collections, int gen2Collections, long? allocatedBytes, long totalOperations, long? survivedBytes) { Gen0Collections = gen0Collections; Gen1Collections = gen1Collections; Gen2Collections = gen2Collections; AllocatedBytes = allocatedBytes; TotalOperations = totalOperations; + SurvivedBytes = survivedBytes; } // did not use array here just to avoid heap allocation @@ -36,6 +37,7 @@ private GcStats(int gen0Collections, int gen1Collections, int gen2Collections, l private long? AllocatedBytes { get; } public long TotalOperations { get; } + public long? SurvivedBytes { get; } public long? GetBytesAllocatedPerOperation(BenchmarkCase benchmarkCase) { @@ -56,38 +58,34 @@ private GcStats(int gen0Collections, int gen1Collections, int gen2Collections, l left.Gen1Collections + right.Gen1Collections, left.Gen2Collections + right.Gen2Collections, left.AllocatedBytes + right.AllocatedBytes, - left.TotalOperations + right.TotalOperations); + left.TotalOperations + right.TotalOperations, + left.SurvivedBytes + right.SurvivedBytes); } public static GcStats operator -(GcStats left, GcStats right) - { - return new GcStats( + => new ( Math.Max(0, left.Gen0Collections - right.Gen0Collections), Math.Max(0, left.Gen1Collections - right.Gen1Collections), Math.Max(0, left.Gen2Collections - right.Gen2Collections), ClampToPositive(left.AllocatedBytes - right.AllocatedBytes), - Math.Max(0, left.TotalOperations - right.TotalOperations)); - } + Math.Max(0, left.TotalOperations - right.TotalOperations), + ClampToPositive(left.SurvivedBytes - right.SurvivedBytes) + ); private static long? ClampToPositive(long? num) { return num.HasValue ? Math.Max(0, num.Value) : null; } - public GcStats WithTotalOperations(long totalOperationsCount) - => this + new GcStats(0, 0, 0, 0, totalOperationsCount); + public GcStats WithTotalOperationsAndSurvivedBytes(long totalOperationsCount, long? survivedBytes) + => this + new GcStats(0, 0, 0, 0, totalOperationsCount, survivedBytes); - public int GetCollectionsCount(int generation) + public int GetCollectionsCount(int generation) => generation switch { - switch (generation) { - case 0: - return Gen0Collections; - case 1: - return Gen1Collections; - default: - return Gen2Collections; - } - } + 0 => Gen0Collections, + 1 => Gen1Collections, + _ => Gen2Collections, + }; /// /// returns total allocated bytes (not per operation) @@ -116,6 +114,7 @@ public static GcStats ReadInitial() GC.CollectionCount(1), GC.CollectionCount(2), allocatedBytes, + 0, 0); } @@ -129,12 +128,13 @@ public static GcStats ReadFinal() // this will force GC.Collect, so we want to do this after collecting collections counts // to exclude this single full forced collection from results GetAllocatedBytes(), + 0, 0); } [PublicAPI] public static GcStats FromForced(int forcedFullGarbageCollections) - => new GcStats(forcedFullGarbageCollections, forcedFullGarbageCollections, forcedFullGarbageCollections, 0, 0); + => new (forcedFullGarbageCollections, forcedFullGarbageCollections, forcedFullGarbageCollections, 0, 0, 0); private static long? GetAllocatedBytes() { @@ -164,24 +164,25 @@ public static GcStats FromForced(int forcedFullGarbageCollections) } public string ToOutputLine() - => $"{ResultsLinePrefix} {Gen0Collections} {Gen1Collections} {Gen2Collections} {AllocatedBytes?.ToString() ?? MetricColumn.UnknownRepresentation} {TotalOperations}"; + => $"{ResultsLinePrefix} {Gen0Collections} {Gen1Collections} {Gen2Collections} {AllocatedBytes?.ToString() ?? MetricColumn.UnknownRepresentation} {TotalOperations} {SurvivedBytes?.ToString() ?? MetricColumn.UnknownRepresentation}"; public static GcStats Parse(string line) { if (!line.StartsWith(ResultsLinePrefix)) throw new NotSupportedException($"Line must start with {ResultsLinePrefix}"); - var measurementSplit = line.Remove(0, ResultsLinePrefix.Length).Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); + var measurementSplit = line.Remove(0, ResultsLinePrefix.Length).Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); if (!int.TryParse(measurementSplit[0], out int gen0) || !int.TryParse(measurementSplit[1], out int gen1) || !int.TryParse(measurementSplit[2], out int gen2) || !TryParse(measurementSplit[3], out long? allocatedBytes) - || !long.TryParse(measurementSplit[4], out long totalOperationsCount)) + || !long.TryParse(measurementSplit[4], out long totalOperationsCount) + || !TryParse(measurementSplit[5], out long? survivedBytes)) { throw new NotSupportedException("Invalid string"); } - return new GcStats(gen0, gen1, gen2, allocatedBytes, totalOperationsCount); + return new GcStats(gen0, gen1, gen2, allocatedBytes, totalOperationsCount, survivedBytes); } private static bool TryParse(string s, out long? result) @@ -218,9 +219,7 @@ private static long CalculateAllocationQuantumSize() break; } - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); + Engine.ForceGcCollect(); result = GC.GetTotalMemory(false); var tmp = new object(); @@ -231,11 +230,17 @@ private static long CalculateAllocationQuantumSize() return result; } - public bool Equals(GcStats other) => Gen0Collections == other.Gen0Collections && Gen1Collections == other.Gen1Collections && Gen2Collections == other.Gen2Collections && AllocatedBytes == other.AllocatedBytes && TotalOperations == other.TotalOperations; + public bool Equals(GcStats other) => + Gen0Collections == other.Gen0Collections + && Gen1Collections == other.Gen1Collections + && Gen2Collections == other.Gen2Collections + && AllocatedBytes == other.AllocatedBytes + && TotalOperations == other.TotalOperations + && SurvivedBytes == other.SurvivedBytes; public override bool Equals(object obj) => obj is GcStats other && Equals(other); - public override int GetHashCode() => HashCode.Combine(Gen0Collections, Gen1Collections, Gen2Collections, AllocatedBytes, TotalOperations); + public override int GetHashCode() => HashCode.Combine(Gen0Collections, Gen1Collections, Gen2Collections, AllocatedBytes, TotalOperations, SurvivedBytes); #if !NET6_0_OR_GREATER // Separate class to have the cctor run lazily, to avoid enabling monitoring before the benchmarks are ran. @@ -312,5 +317,49 @@ private static bool CheckMonitoringTotalAllocatedMemorySize() } } #endif + + internal static bool InitTotalBytes() + { + // Don't try to measure in Mono, Monitoring is not available, and GC.GetTotalMemory is very inaccurate. + if (RuntimeInformation.IsMono) + return false; + if (GetTotalBytesDelegate == null) + { + GetTotalBytesDelegate = CreateGetTotalBytesFunc(); + // Measure bytes to allow GC monitor to make its initial allocations. + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(GetTotalBytes()); + } + return true; + } + + internal static long GetTotalBytes() + => GetTotalBytesDelegate.Invoke(); + + private static Func GetTotalBytesDelegate; + + private static Func CreateGetTotalBytesFunc() + { + try + { + AppDomain.MonitoringIsEnabled = true; + // verify the api works + if (AppDomain.MonitoringIsEnabled && AppDomain.CurrentDomain.MonitoringSurvivedMemorySize >= 0) + return () => + { + // Enforce GC.Collect here to make sure we get accurate results. + Engine.ForceGcCollect(); + return AppDomain.CurrentDomain.MonitoringSurvivedMemorySize; + }; + } + catch { } + + // Fallback to GC.GetTotalMemory if MonitoringSurvivedMemorySize doesn't work. + return () => + { + // Enforce GC.Collect here to make sure we get accurate results. + Engine.ForceGcCollect(); + return GC.GetTotalMemory(true); + }; + } } } diff --git a/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs b/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs index 0d07c9058c..9eea4d7f4f 100644 --- a/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs +++ b/src/BenchmarkDotNet/Exporters/Csv/CsvMeasurementsExporter.cs @@ -72,6 +72,10 @@ private static MeasurementColumn[] GetColumns(Summary summary) new MeasurementColumn("Gen_2", (_, report, __) => report.GcStats.Gen2Collections.ToString(summary.GetCultureInfo())), new MeasurementColumn("Allocated_Bytes", (_, report, __) => report.GcStats.GetBytesAllocatedPerOperation(report.BenchmarkCase)?.ToString(summary.GetCultureInfo()) ?? MetricColumn.UnknownRepresentation) }; + if (summary.BenchmarksCases.Any(benchmark => benchmark.Config.HasSurvivedMemoryDiagnoser())) + { + columns.Add(new MeasurementColumn("Survived_Bytes", (_, report, __) => report.GcStats.SurvivedBytes?.ToString(summary.GetCultureInfo()) ?? MetricColumn.UnknownRepresentation)); + } return columns.ToArray(); } diff --git a/src/BenchmarkDotNet/Templates/BenchmarkType.txt b/src/BenchmarkDotNet/Templates/BenchmarkType.txt index f17737d646..46ca87eb31 100644 --- a/src/BenchmarkDotNet/Templates/BenchmarkType.txt +++ b/src/BenchmarkDotNet/Templates/BenchmarkType.txt @@ -38,6 +38,7 @@ TargetJob = job, OperationsPerInvoke = $OperationsPerInvoke$, MeasureExtraStats = $MeasureExtraStats$, + MeasureSurvivedMemory = $MeasureSurvivedMemory$, BenchmarkName = benchmarkName }; diff --git a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Runnable/RunnableReuse.cs b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Runnable/RunnableReuse.cs index 7067650ec1..30802d242e 100644 --- a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Runnable/RunnableReuse.cs +++ b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Runnable/RunnableReuse.cs @@ -102,6 +102,7 @@ private static EngineParameters CreateEngineParameters( TargetJob = benchmarkCase.Job, OperationsPerInvoke = benchmarkCase.Descriptor.OperationsPerInvoke, MeasureExtraStats = benchmarkCase.Config.HasExtraStatsDiagnoser(), + MeasureSurvivedMemory = benchmarkCase.Config.HasSurvivedMemoryDiagnoser(), BenchmarkName = FullNameProvider.GetBenchmarkName(benchmarkCase) }; return engineParameters; diff --git a/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs b/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs index a52e7ebcf8..e909ab771e 100644 --- a/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs +++ b/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs @@ -143,6 +143,7 @@ public static void RunCore(IHost host, BenchmarkCase benchmarkCase) TargetJob = job, OperationsPerInvoke = target.OperationsPerInvoke, MeasureExtraStats = benchmarkCase.Config.HasExtraStatsDiagnoser(), + MeasureSurvivedMemory = benchmarkCase.Config.HasSurvivedMemoryDiagnoser(), BenchmarkName = FullNameProvider.GetBenchmarkName(benchmarkCase) }; diff --git a/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs b/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs index fc6bc14362..2d0c1e98de 100755 --- a/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs +++ b/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs @@ -9,6 +9,7 @@ using BenchmarkDotNet.Columns; using BenchmarkDotNet.Configs; using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Engines; using BenchmarkDotNet.Extensions; using BenchmarkDotNet.IntegrationTests.Xunit; using BenchmarkDotNet.Jobs; @@ -66,6 +67,76 @@ public void MemoryDiagnoserIsAccurate(IToolchain toolchain) }); } + public class AccurateSurvived + { + [Benchmark] public void Empty() { } + [Benchmark] public byte[] EightBytesArray() => new byte[8]; + [Benchmark] public byte[] SixtyFourBytesArray() => new byte[64]; + [Benchmark] public Task AllocateTask() => Task.FromResult(-12345); + + + public byte[] bytes8; + public byte[] bytes64; + public Task task; + + [GlobalSetup(Targets = new string[] { nameof(EightBytesArrayNoAllocate), nameof(SixtyFourBytesArrayNoAllocate), nameof(TaskNoAllocate) })] + public void SetupNoAllocate() + { + bytes8 = new byte[8]; + bytes64 = new byte[64]; + task = Task.FromResult(-12345); + } + + [Benchmark] public byte[] EightBytesArrayNoAllocate() => bytes8; + [Benchmark] public byte[] SixtyFourBytesArrayNoAllocate() => bytes64; + [Benchmark] public Task TaskNoAllocate() => task; + + + [Benchmark] public void EightBytesArraySurvive() => bytes8 = new byte[8]; + [Benchmark] public void SixtyFourBytesArraySurvive() => bytes64 = new byte[64]; + [Benchmark] public void AllocateTaskSurvive() => task = Task.FromResult(-12345); + + + [Benchmark] public void EightBytesArrayAllocateNoSurvive() => DeadCodeEliminationHelper.KeepAliveWithoutBoxing(new byte[8]); + [Benchmark] public void SixtyFourBytesArrayAllocateNoSurvive() => DeadCodeEliminationHelper.KeepAliveWithoutBoxing(new byte[64]); + [Benchmark] public void TaskAllocateNoSurvive() => DeadCodeEliminationHelper.KeepAliveWithoutBoxing(Task.FromResult(-12345)); + } + + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void MemoryDiagnoserSurvivedIsAccurate(IToolchain toolchain) + { + if (RuntimeInformation.IsNetCore && toolchain.IsInProcess) + { + // Survived memory is inaccurate with IL emit. + return; + } + + long objectAllocationOverhead = IntPtr.Size * 2; // pointer to method table + object header word + long arraySizeOverhead = IntPtr.Size; // array length + + AssertSurvived(toolchain, typeof(AccurateSurvived), new Dictionary + { + { nameof(AccurateSurvived.Empty), 0 }, + + { nameof(AccurateSurvived.EightBytesArray), 0 }, + { nameof(AccurateSurvived.SixtyFourBytesArray), 0 }, + { nameof(AccurateSurvived.AllocateTask), 0 }, + + { nameof(AccurateSurvived.EightBytesArrayNoAllocate), 0 }, + { nameof(AccurateSurvived.SixtyFourBytesArrayNoAllocate), 0 }, + { nameof(AccurateSurvived.TaskNoAllocate), 0 }, + + { nameof(AccurateSurvived.EightBytesArraySurvive), 8 + objectAllocationOverhead + arraySizeOverhead }, + { nameof(AccurateSurvived.SixtyFourBytesArraySurvive), 64 + objectAllocationOverhead + arraySizeOverhead }, + { nameof(AccurateSurvived.AllocateTaskSurvive), CalculateRequiredSpace>() }, + + { nameof(AccurateSurvived.EightBytesArrayAllocateNoSurvive), 0 }, + { nameof(AccurateSurvived.SixtyFourBytesArrayAllocateNoSurvive), 0 }, + { nameof(AccurateSurvived.TaskAllocateNoSurvive), 0 }, + }); + } + [FactEnvSpecific("We don't want to test NativeAOT twice (for .NET Framework 4.6.2 and .NET 7.0)", EnvRequirement.DotNetCoreOnly)] public void MemoryDiagnoserSupportsNativeAOT() { @@ -114,6 +185,16 @@ public void MemoryDiagnoserDoesNotIncludeAllocationsFromSetupAndCleanup(IToolcha }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void MemoryDiagnoserDoesNotIncludeSurvivedFromSetupAndCleanup(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(AllocatingGlobalSetupAndCleanup), new Dictionary + { + { nameof(AllocatingGlobalSetupAndCleanup.AllocateNothing), 0 } + }); + } + public class NoAllocationsAtAll { [Benchmark] public void EmptyMethod() { } @@ -134,6 +215,16 @@ public void EngineShouldNotInterfereAllocationResults(IToolchain toolchain) }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void EngineShouldNotInterfereSurvivedResults(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(NoAllocationsAtAll), new Dictionary + { + { nameof(NoAllocationsAtAll.EmptyMethod), 0 } + }); + } + public class NoBoxing { [Benchmark] public ValueTuple ReturnsValueType() => new ValueTuple(0); @@ -149,9 +240,28 @@ public void EngineShouldNotIntroduceBoxing(IToolchain toolchain) }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void EngineShouldNotIntroduceBoxingSurvived(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(NoBoxing), new Dictionary + { + { nameof(NoBoxing.ReturnsValueType), 0 } + }); + } + public class NonAllocatingAsynchronousBenchmarks { - private readonly Task completedTaskOfT = Task.FromResult(default(int)); // we store it in the field, because Task is reference type so creating it allocates heap memory + private readonly Task completedTaskOfT = Task.FromResult(-12345); // we store it in the field, because Task is reference type so creating it allocates heap memory + + [GlobalSetup] + public void Setup() + { + // Run once to set static memory. + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(CompletedTask()); + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(CompletedTaskOfT()); + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(CompletedValueTaskOfT()); + } [Benchmark] public Task CompletedTask() => Task.CompletedTask; @@ -177,6 +287,18 @@ public void AwaitingTasksShouldNotInterfereAllocationResults(IToolchain toolchai }); } + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void AwaitingTasksShouldNotInterfereSurvivedResults(IToolchain toolchain) + { + AssertSurvived(toolchain, typeof(NonAllocatingAsynchronousBenchmarks), new Dictionary + { + { nameof(NonAllocatingAsynchronousBenchmarks.CompletedTask), 0 }, + { nameof(NonAllocatingAsynchronousBenchmarks.CompletedTaskOfT), 0 }, + { nameof(NonAllocatingAsynchronousBenchmarks.CompletedValueTaskOfT), 0 } + }); + } + public class WithOperationsPerInvokeBenchmarks { [Benchmark(OperationsPerInvoke = 4)] @@ -276,7 +398,7 @@ public void MemoryDiagnoserIsAccurateForMultiThreadedBenchmarks(IToolchain toolc private void AssertAllocations(IToolchain toolchain, Type benchmarkType, Dictionary benchmarksAllocationsValidators) { - var config = CreateConfig(toolchain); + var config = CreateConfig(toolchain, Job.ShortRun, MemoryDiagnoser.Default); var benchmarks = BenchmarkConverter.TypeToBenchmarks(benchmarkType, config); var summary = BenchmarkRunner.Run(benchmarks); @@ -312,9 +434,29 @@ private void AssertAllocations(IToolchain toolchain, Type benchmarkType, Diction } } - private IConfig CreateConfig(IToolchain toolchain) + private void AssertSurvived(IToolchain toolchain, Type benchmarkType, Dictionary benchmarkSurvivedValidators) + { + var config = CreateConfig(toolchain, Job.Dry, new MemoryDiagnoser(new MemoryDiagnoserConfig(includeSurvived: true))); + var benchmarks = BenchmarkConverter.TypeToBenchmarks(benchmarkType, config); + + var summary = BenchmarkRunner.Run(benchmarks); + + foreach (var benchmarkSurvivedValidator in benchmarkSurvivedValidators) + { + var survivedBenchmarks = benchmarks.BenchmarksCases.Where(benchmark => benchmark.Descriptor.WorkloadMethodDisplayInfo == benchmarkSurvivedValidator.Key); + + foreach (var benchmark in survivedBenchmarks) + { + var benchmarkReport = summary.Reports.Single(report => report.BenchmarkCase == benchmark); + + Assert.Equal(benchmarkSurvivedValidator.Value, benchmarkReport.GcStats.SurvivedBytes); + } + } + } + + private IConfig CreateConfig(IToolchain toolchain, Job baseJob, MemoryDiagnoser memoryDiagnoser) => ManualConfig.CreateEmpty() - .AddJob(Job.ShortRun + .AddJob(baseJob .WithEvaluateOverhead(false) // no need to run idle for this test .WithWarmupCount(0) // don't run warmup to save some time for our CI runs .WithIterationCount(1) // single iteration is enough for us @@ -324,7 +466,7 @@ private IConfig CreateConfig(IToolchain toolchain) .WithEnvironmentVariable("COMPlus_TieredCompilation", "0") // Tiered JIT can allocate some memory on a background thread, let's disable it to make our tests less flaky (#1542) .WithToolchain(toolchain)) .AddColumnProvider(DefaultColumnProviders.Instance) - .AddDiagnoser(MemoryDiagnoser.Default) + .AddDiagnoser(memoryDiagnoser) .AddLogger(toolchain.IsInProcess ? ConsoleLogger.Default : new OutputLogger(output)); // we can't use OutputLogger for the InProcess toolchains because it allocates memory on the same thread // note: don't copy, never use in production systems (it should work but I am not 100% sure)