Skip to content

Commit

Permalink
[SDTEST-106] Early Flake Detection (#145)
Browse files Browse the repository at this point in the history
* implemented EFD. Better cache management
* added efd tags to the session
* added efd tests. fixed ATR bug
* added exporter abstraction
  • Loading branch information
ypopovych authored Oct 23, 2024
1 parent a150a52 commit 68e1a33
Show file tree
Hide file tree
Showing 24 changed files with 1,101 additions and 217 deletions.
16 changes: 16 additions & 0 deletions DatadogSDKTesting.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

/* Begin PBXBuildFile section */
A71119A22BA8A4F400118323 /* DatadogSDKTesting.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = A7FC25642BA1E83500067E26 /* DatadogSDKTesting.framework */; };
A718EC2D2CBD5DCE00C5F0D9 /* EarlyFlakeDetection.swift in Sources */ = {isa = PBXBuildFile; fileRef = A718EC2C2CBD5DCE00C5F0D9 /* EarlyFlakeDetection.swift */; };
A718EC2F2CBD73F300C5F0D9 /* CacheManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = A718EC2E2CBD73F300C5F0D9 /* CacheManager.swift */; };
A718EC312CBE754300C5F0D9 /* EarlyFlakeDetectionService.swift in Sources */ = {isa = PBXBuildFile; fileRef = A718EC302CBE754300C5F0D9 /* EarlyFlakeDetectionService.swift */; };
A718EC352CC7F32A00C5F0D9 /* EarlyFlakeDetectionTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = A718EC342CC7F32A00C5F0D9 /* EarlyFlakeDetectionTests.swift */; };
A71D89262BD6CACD00531A90 /* Logger.swift in Sources */ = {isa = PBXBuildFile; fileRef = A71D89252BD6CACD00531A90 /* Logger.swift */; };
A71E64692BAA0AA100F2ACA5 /* Config.swift in Sources */ = {isa = PBXBuildFile; fileRef = A71E64512BAA0AA100F2ACA5 /* Config.swift */; };
A71E646A2BAA0AA100F2ACA5 /* Environment.swift in Sources */ = {isa = PBXBuildFile; fileRef = A71E64532BAA0AA100F2ACA5 /* Environment.swift */; };
Expand Down Expand Up @@ -301,6 +305,10 @@
8194A2242940B33100B4B592 /* DDTestSession.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DDTestSession.swift; sourceTree = "<group>"; };
81AA244A2978589600DE8F8A /* DDFileReader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DDFileReader.swift; sourceTree = "<group>"; };
81EDADB2292698A200279027 /* LLVMTotalsCoverageFormat.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LLVMTotalsCoverageFormat.swift; sourceTree = "<group>"; };
A718EC2C2CBD5DCE00C5F0D9 /* EarlyFlakeDetection.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EarlyFlakeDetection.swift; sourceTree = "<group>"; };
A718EC2E2CBD73F300C5F0D9 /* CacheManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CacheManager.swift; sourceTree = "<group>"; };
A718EC302CBE754300C5F0D9 /* EarlyFlakeDetectionService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EarlyFlakeDetectionService.swift; sourceTree = "<group>"; };
A718EC342CC7F32A00C5F0D9 /* EarlyFlakeDetectionTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EarlyFlakeDetectionTests.swift; sourceTree = "<group>"; };
A71D89252BD6CACD00531A90 /* Logger.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Logger.swift; sourceTree = "<group>"; };
A71E64512BAA0AA100F2ACA5 /* Config.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Config.swift; sourceTree = "<group>"; };
A71E64532BAA0AA100F2ACA5 /* Environment.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Environment.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -711,6 +719,7 @@
E1A63F5B26789915002F48FA /* CodeOwners.swift */,
E1550C44274F7FF700006F11 /* Clock.swift */,
81AA244A2978589600DE8F8A /* DDFileReader.swift */,
A718EC2E2CBD73F300C5F0D9 /* CacheManager.swift */,
);
path = Utils;
sourceTree = "<group>";
Expand Down Expand Up @@ -842,6 +851,7 @@
E143CF1127D7790300F4018A /* Persistence */,
E143CF1627D7790300F4018A /* Files */,
A76174C02CB685250098CE99 /* SettingsService.swift */,
A718EC302CBE754300C5F0D9 /* EarlyFlakeDetectionService.swift */,
E143CF0C27D7790300F4018A /* ExporterConfiguration.swift */,
E143CF1927D7790300F4018A /* EventsExporter.swift */,
);
Expand Down Expand Up @@ -986,6 +996,7 @@
E116D7D925248C280022779D /* Utils */,
A71E64512BAA0AA100F2ACA5 /* Config.swift */,
A7B70E3B2C5CDE8B00E4AE09 /* Endpoint.swift */,
A718EC2C2CBD5DCE00C5F0D9 /* EarlyFlakeDetection.swift */,
E158E2DB25471DA000DBCD6C /* DDInstrumentationControl.swift */,
8194A2242940B33100B4B592 /* DDTestSession.swift */,
E10034D92703599D00439C9C /* DDTestModule.swift */,
Expand Down Expand Up @@ -1061,6 +1072,7 @@
E1BB54AB25401E47007C2D93 /* OutputCapture */,
E1AD0741252E1EBB003705C1 /* Utils */,
A7FC26902BA204B900067E26 /* DatadogSDKTesting.xctestplan */,
A718EC342CC7F32A00C5F0D9 /* EarlyFlakeDetectionTests.swift */,
E1AD0761252F2F7E003705C1 /* DDTestObserverTests.swift */,
E1AD0756252F01F0003705C1 /* EnvironmentTests.swift */,
A71E647F2BAB442300F2ACA5 /* ConfigTests.swift */,
Expand Down Expand Up @@ -1494,6 +1506,7 @@
A7FC26132BA1EC0500067E26 /* CodeOwners.swift in Sources */,
A7FC25B72BA1EADF00067E26 /* DDTestSession.swift in Sources */,
A71E64702BAA0AA100F2ACA5 /* TravisCI.swift in Sources */,
A718EC2F2CBD73F300C5F0D9 /* CacheManager.swift in Sources */,
A7FC25BC2BA1EADF00067E26 /* FrameworkLoadHandler.swift in Sources */,
A7FC26122BA1EC0500067E26 /* SwiftExtensions.swift in Sources */,
A7FC260C2BA1EC0500067E26 /* GitInfo.swift in Sources */,
Expand All @@ -1518,6 +1531,7 @@
A7FC25DF2BA1EBF300067E26 /* StdoutCapture.swift in Sources */,
A71E64782BAA0AA100F2ACA5 /* AzureCI.swift in Sources */,
A7FC25D32BA1EB3400067E26 /* DDSymbolicator.swift in Sources */,
A718EC2D2CBD5DCE00C5F0D9 /* EarlyFlakeDetection.swift in Sources */,
A7FC25C12BA1EAF500067E26 /* GitUploader.swift in Sources */,
A71E646B2BAA0AA100F2ACA5 /* Span+Environment.swift in Sources */,
A76174BF2CB45F9B0098CE99 /* XCTestExtensions.swift in Sources */,
Expand Down Expand Up @@ -1556,6 +1570,7 @@
A7FC266B2BA1F70200067E26 /* EnvironmentTests.swift in Sources */,
A7FC266E2BA1F70200067E26 /* DDTestObserverTests.swift in Sources */,
A7FC266C2BA1F70200067E26 /* FrameworkLoadHandlerTests.swift in Sources */,
A718EC352CC7F32A00C5F0D9 /* EarlyFlakeDetectionTests.swift in Sources */,
A7FC265F2BA1F66E00067E26 /* DDTestSessionApiTests.m in Sources */,
A7FC26602BA1F67800067E26 /* SimpleSpanSerializerTests.swift in Sources */,
A7FC26672BA1F6FC00067E26 /* CodeOwnersTests.swift in Sources */,
Expand Down Expand Up @@ -1605,6 +1620,7 @@
A7FC26332BA1ECC500067E26 /* SpanEncoder.swift in Sources */,
A7FC26492BA1ECF800067E26 /* Directory.swift in Sources */,
A7FC26202BA1EC6200067E26 /* SearchCommitFormat.swift in Sources */,
A718EC312CBE754300C5F0D9 /* EarlyFlakeDetectionService.swift in Sources */,
A7FC261F2BA1EC6200067E26 /* SkipTestsFormat.swift in Sources */,
A76174C22CB685390098CE99 /* SettingsService.swift in Sources */,
A7FC26192BA1EC5A00067E26 /* DataUploadDelay.swift in Sources */,
Expand Down
6 changes: 6 additions & 0 deletions Sources/DatadogSDKTesting/Config.swift
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ final class Config {
var excludedBranches: Set<String> = []
var codeCoveragePriority: CodeCoveragePriority = .utility

/// EFD
var efdEnabled: Bool = true

/// Auto Test Retries
var testRetriesEnabled: Bool = true
var testRetriesTestRetryCount: UInt = 5
Expand Down Expand Up @@ -130,6 +133,9 @@ final class Config {
let coveragePerTestOnly = env[.ciVisibilityCodeCoverageOnlyPerTest] ?? false
coverageMode = coverageEnabled ? (coveragePerTestOnly ? .perTest : .total) : .disabled

/// EFD
efdEnabled = env[.enableCiVisibilityEFD] ?? efdEnabled

/// Automatic Test Retries
testRetriesEnabled = env[.enableCiVisibilityFlakyRetries] ?? testRetriesEnabled
testRetriesTestRetryCount = env[.ciVisibilityFlakyRetryCount] ?? testRetriesTestRetryCount
Expand Down
23 changes: 14 additions & 9 deletions Sources/DatadogSDKTesting/Coverage/DDCoverageHelper.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,28 @@ import Foundation

typealias cFunc = @convention(c) () -> Void

class DDCoverageHelper {
final class DDCoverageHelper {
var llvmProfileURL: URL
var storagePath: Directory
var initialCoverageSaved: Bool
let isTotal: Bool
let debug: Bool
let coverageWorkQueue: OperationQueue

init?(storagePath: Directory, total: Bool, priority: CodeCoveragePriority) {
init?(storagePath: Directory, total: Bool, priority: CodeCoveragePriority, debug: Bool) {
guard let profilePath = Self.profileGetFileName(), BinaryImages.profileImages.count > 0 else {
Log.print("Coverage not properly enabled in project, check documentation")
Log.debug("LLVM_PROFILE_FILE: \(Self.profileGetFileName() ?? "NIL")")
Log.debug("Profile Images count: \(BinaryImages.profileImages.count)")
return nil
}

guard let path = try? storagePath.createSubdirectory(path: "coverage") else {
Log.debug("Can't create subdirectory in: \(storagePath)")
return nil
}

llvmProfileURL = URL(fileURLWithPath: profilePath)
isTotal = total
self.storagePath = path
self.debug = debug
self.storagePath = storagePath
Log.debug("LLVM Coverage location: \(llvmProfileURL.path)")
Log.debug("DDCoverageHelper location: \(path.url.path)")
Log.debug("DDCoverageHelper location: \(storagePath.url.path)")
initialCoverageSaved = false
coverageWorkQueue = OperationQueue()
coverageWorkQueue.qualityOfService = priority.qos
Expand Down Expand Up @@ -103,6 +100,14 @@ class DDCoverageHelper {
}
}

deinit {
if !debug {
try? storagePath.delete()
} else {
Log.debug("DDCoverageHelper storage path: \(storagePath.url.path)")
}
}

func removeStoragePath() {
try? storagePath.delete()
}
Expand Down
9 changes: 4 additions & 5 deletions Sources/DatadogSDKTesting/Crashes/DDCrashes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@ internal enum DDCrashes {
private static var crashCustomData = [String: Data]()
fileprivate static var sanitizerURL: URL!

static func install(disableMach: Bool) {
static func install(folder: Directory, disableMach: Bool) {
if sharedPLCrashReporter == nil {
installPLCrashReporterHandler(disableMach: disableMach)
installPLCrashReporterHandler(folder: folder, disableMach: disableMach)
}
}

private static func installPLCrashReporterHandler(disableMach: Bool) {
let crashDir = try? Directory(withSubdirectoryPath: "com.datadog.civisibility/crash")
private static func installPLCrashReporterHandler(folder: Directory, disableMach: Bool) {
let signalHandler: PLCrashReporterSignalHandlerType
#if os(macOS) || os(iOS)
signalHandler = disableMach ? .BSD : .mach
Expand All @@ -39,7 +38,7 @@ internal enum DDCrashes {
#endif
let config = PLCrashReporterConfig(signalHandlerType: signalHandler,
symbolicationStrategy: [],
basePath: crashDir?.url.path)
basePath: folder.url.path)
guard let plCrashReporter = PLCrashReporter(configuration: config) else {
return
}
Expand Down
8 changes: 7 additions & 1 deletion Sources/DatadogSDKTesting/DDTags.swift
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ internal enum DDTagValues {
static let statusPass = "pass"
static let statusFail = "fail"
static let statusSkip = "skip"

static let efdAbortSlow = "slow"
static let efdAbortFaulty = "faulty"
}

internal enum DDItrTags {
Expand All @@ -204,7 +207,10 @@ internal enum DDItrTags {
}

internal enum DDEfdTags {
static let isRetry = "test.is_retry"
static let testIsNew = "test.is_new"
static let testIsRetry = "test.is_retry"
static let testEfdEnabled = "test.early_flake.enabled"
static let testEfdAbortReason = "test.early_flake.abort_reason"
}

internal enum DDCFMessageID {
Expand Down
4 changes: 2 additions & 2 deletions Sources/DatadogSDKTesting/DDTest.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class DDTest: NSObject {
self.itr = itr
self.isUITest = false

currentTestExecutionOrder = module.currentExecutionOrder.checkedAdd(1)!
currentTestExecutionOrder = module.incrementTestRuns()

let attributes: [String: String] = [
DDGenericTags.type: DDTagValues.typeTest,
Expand Down Expand Up @@ -251,7 +251,7 @@ extension DDTest {
span.setAttribute(key: DDTestTags.testStatus, value: DDTagValues.statusSkip)
if itr.skipped {
span.setAttribute(key: DDTestTags.testSkippedByITR, value: "true")
module.itrSkipped.update { $0 += 1 }
module.incrementSkipped()
}
span.status = .ok
}
Expand Down
92 changes: 87 additions & 5 deletions Sources/DatadogSDKTesting/DDTestModule.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,19 @@ public class DDTestModule: NSObject, Encodable {
var status: DDTestStatus
var localization: String
var configError = false
var currentExecutionOrder: Synced<UInt> = Synced(0)
var itrSkipped: Synced<UInt> = Synced(0)
var atrRetried: Synced<UInt> = Synced(0)

private var _counters: Synced<Counters> = Synced(.init())

var testRunsCount: UInt { _counters.value.allTestRuns }
var skippedByITRTestsCount: UInt { _counters.value.skippedTests }
var retriedByATRRunsCount: UInt { _counters.value.retryTestRuns }

var efdTestsCounts: (newTests: UInt, knownTests: UInt) {
_counters.use { ($0.newTests, $0.knownTests) }
}

var efdSessionFailed: Bool = false

var linesCovered: Double? = nil

init(bundleName: String, startTime: Date?) {
Expand Down Expand Up @@ -78,7 +88,9 @@ public class DDTestModule: NSObject, Encodable {

if !DDTestMonitor.config.disableCrashHandler {
Log.measure(name: "DDCrashesInstall") {
DDCrashes.install(disableMach: DDTestMonitor.config.disableMachCrashHandler)
DDCrashes.install(
folder: try! DDTestMonitor.cacheManager!.session(feature: "crash"),
disableMach: DDTestMonitor.config.disableMachCrashHandler)
}
}
let moduleStartTime = startTime ?? beforeLoadingTime
Expand All @@ -97,6 +109,44 @@ public class DDTestModule: NSObject, Encodable {
Log.debug("Module loading time interval: \(DDTestMonitor.clock.now.timeIntervalSince(beforeLoadingTime))")
}

@discardableResult
func incrementSkipped() -> UInt {
_counters.update { cnt in
defer { cnt.skippedTests += 1 }
return cnt.skippedTests
}
}

func incrementTestRuns() -> UInt {
_counters.update { cnt in
defer { cnt.allTestRuns += 1 }
return cnt.allTestRuns
}
}

func incrementRetries(max: UInt) -> UInt? {
_counters.update { cnt in
cnt.retryTestRuns.checkedAdd(1, max: max).map {
cnt.retryTestRuns = $0
return $0
}
}
}

@discardableResult
func incrementNewTests() -> UInt {
_counters.update { cnt in
defer { cnt.newTests += 1 }
return cnt.allTestRuns
}
}

func addExpectedTests(count: UInt) {
_counters.update { cnt in
cnt.knownTests += count
}
}

private func internalEnd(endTime: Date? = nil) {
duration = (endTime ?? DDTestMonitor.clock.now).timeIntervalSince(startTime).toNanoseconds

Expand Down Expand Up @@ -135,7 +185,7 @@ public class DDTestModule: NSObject, Encodable {
meta[DDUISettingsTags.uiSettingsModuleLocalization] = localization
meta[DDTestSessionTags.testCodeCoverageEnabled] = (DDTestMonitor.instance?.coverageHelper != nil) ? "true" : "false"

let itrSkipped = self.itrSkipped.value
let itrSkipped = self.skippedByITRTestsCount

if DDTestMonitor.instance?.itr != nil {
meta[DDTestSessionTags.testItrSkippingType] = DDTagValues.typeTest
Expand All @@ -156,6 +206,13 @@ public class DDTestModule: NSObject, Encodable {
meta[DDTestSessionTags.testItrSkipped] = "true"
}

if DDTestMonitor.instance?.efd != nil {
meta[DDEfdTags.testEfdEnabled] = "true"
}
if efdSessionFailed {
meta[DDEfdTags.testEfdAbortReason] = DDTagValues.efdAbortFaulty
}

DDTestMonitor.tracer.eventsExporter?.exportEvent(event: DDTestModuleEnvelope(self))
Log.debug("Exported module_end event moduleId: \(self.id)")

Expand All @@ -173,6 +230,21 @@ public class DDTestModule: NSObject, Encodable {
DDTestMonitor.tracer.flush()
DDTestMonitor.instance?.gitUploadQueue.waitUntilAllOperationsAreFinished()
}

func checkEfdStatus(for test: DDTest, efd: EarlyFlakeDetectionService?) -> Bool {
guard !efdSessionFailed, test.module.bundleName == bundleName else { return false }
guard let known = efd?.knownTests, let threshold = efd?.faultySessionThreshold else { return false }
// Calculate threshold
let counts = efdTestsCounts
let testsCount = max(Double(known.testCount), Double(counts.knownTests))
let newTests = Double(counts.newTests)
guard newTests <= threshold || ((newTests / testsCount) * 100.0) < threshold else {
Log.print("Early Flake Detection Faulty Session detected!")
efdSessionFailed = true
return false
}
return known.isNew(test: test.name, in: test.suite.name, and: bundleName)
}
}

/// Public interface for DDTestModule
Expand Down Expand Up @@ -267,3 +339,13 @@ extension DDTestModule {
}
}
}

private extension DDTestModule {
struct Counters {
var skippedTests: UInt = 0
var retryTestRuns: UInt = 0
var allTestRuns: UInt = 0
var newTests: UInt = 0
var knownTests: UInt = 0
}
}
Loading

0 comments on commit 68e1a33

Please sign in to comment.