From 6f54591520e3d2668401a7039188631fbfb03340 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 12:54:02 -0400 Subject: [PATCH 01/43] initial sketch --- pkg/detectioncaching/detectioncaching.go | 54 ++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 pkg/detectioncaching/detectioncaching.go diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go new file mode 100644 index 000000000000..cacbde1414d9 --- /dev/null +++ b/pkg/detectioncaching/detectioncaching.go @@ -0,0 +1,54 @@ +package detectioncaching + +import ( + "github.com/trufflesecurity/trufflehog/v3/pkg/cache" + "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" +) + +func FromDataCached( + ctx context.Context, + cache cache.Cache[*detectors.Result], + detector detectors.Detector, + verify bool, + data []byte, +) ([]detectors.Result, error) { + withoutVerification, err := detector.FromData(ctx, false, data) + if err != nil { + return nil, err + } + + if !verify { + return withoutVerification, nil + } + + everythingCached := false + var cachedResults []detectors.Result + for _, r := range withoutVerification { + if cacheHit, ok := cache.Get(cacheKey(r)); ok { + cachedResults = append(cachedResults, *cacheHit) + } else { + everythingCached = false + break + } + } + + if everythingCached { + return cachedResults, nil + } + + results, err := detector.FromData(ctx, true, data) + if err != nil { + return nil, err + } + + for _, r := range results { + cache.Set(cacheKey(r), &r) + } + + return results, nil +} + +func cacheKey(result detectors.Result) string { + return string(result.Raw) + string(result.RawV2) +} From 0a9a30b733e31192c84d23314d6212b94bc82261 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 13:01:42 -0400 Subject: [PATCH 02/43] allow forced cache misses --- pkg/detectioncaching/detectioncaching.go | 25 +++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index cacbde1414d9..5149dae5ebfe 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -11,6 +11,7 @@ func FromDataCached( cache cache.Cache[*detectors.Result], detector detectors.Detector, verify bool, + forceCacheMiss bool, data []byte, ) ([]detectors.Result, error) { withoutVerification, err := detector.FromData(ctx, false, data) @@ -22,19 +23,21 @@ func FromDataCached( return withoutVerification, nil } - everythingCached := false - var cachedResults []detectors.Result - for _, r := range withoutVerification { - if cacheHit, ok := cache.Get(cacheKey(r)); ok { - cachedResults = append(cachedResults, *cacheHit) - } else { - everythingCached = false - break + if !forceCacheMiss { + everythingCached := false + var cachedResults []detectors.Result + for _, r := range withoutVerification { + if cacheHit, ok := cache.Get(cacheKey(r)); ok { + cachedResults = append(cachedResults, *cacheHit) + } else { + everythingCached = false + break + } } - } - if everythingCached { - return cachedResults, nil + if everythingCached { + return cachedResults, nil + } } results, err := detector.FromData(ctx, true, data) From 0f0917b029372389ccecd14bdbf57cd62f3ad241 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 13:12:09 -0400 Subject: [PATCH 03/43] rearrange --- pkg/detectioncaching/detectioncaching.go | 38 ++++++++++++++++-------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index 5149dae5ebfe..f2bd6157de9f 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -23,23 +23,35 @@ func FromDataCached( return withoutVerification, nil } - if !forceCacheMiss { - everythingCached := false - var cachedResults []detectors.Result - for _, r := range withoutVerification { - if cacheHit, ok := cache.Get(cacheKey(r)); ok { - cachedResults = append(cachedResults, *cacheHit) - } else { - everythingCached = false - break - } - } + if forceCacheMiss { + return verifyAndCache(ctx, cache, detector, data) + } - if everythingCached { - return cachedResults, nil + everythingCached := false + var cachedResults []detectors.Result + for _, r := range withoutVerification { + if cacheHit, ok := cache.Get(cacheKey(r)); ok { + cachedResults = append(cachedResults, *cacheHit) + } else { + everythingCached = false + break } } + if everythingCached { + return cachedResults, nil + } + + return verifyAndCache(ctx, cache, detector, data) +} + +func verifyAndCache( + ctx context.Context, + cache cache.Cache[*detectors.Result], + detector detectors.Detector, + data []byte, +) ([]detectors.Result, error) { + results, err := detector.FromData(ctx, true, data) if err != nil { return nil, err From d074f6d787a1006da768abbd79b77d4eb7be8e97 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 13:21:27 -0400 Subject: [PATCH 04/43] plug into engine --- pkg/engine/engine.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 4e0ee318df07..cb7c727ea571 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -13,6 +13,9 @@ import ( "github.com/adrg/strutil" "github.com/adrg/strutil/metrics" lru "github.com/hashicorp/golang-lru/v2" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectioncaching" "google.golang.org/protobuf/proto" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -152,9 +155,10 @@ type Config struct { // customization through various options and configurations. type Engine struct { // CLI flags. - concurrency int - decoders []decoders.Decoder - detectors []detectors.Detector + concurrency int + decoders []decoders.Decoder + detectors []detectors.Detector + detectionCache cache.Cache[*detectors.Result] // Any detectors configured to override sources' verification flags detectorVerificationOverrides map[config.DetectorID]bool @@ -219,6 +223,7 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { concurrency: cfg.Concurrency, decoders: cfg.Decoders, detectors: cfg.Detectors, + detectionCache: simple.NewCache[*detectors.Result](), dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, @@ -1052,7 +1057,13 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { t := time.AfterFunc(detectionTimeout+1*time.Second, func() { ctx.Logger().Error(nil, "a detector ignored the context timeout") }) - results, err := data.detector.Detector.FromData(ctx, data.chunk.Verify, matchBytes) + results, err := detectioncaching.FromDataCached( + ctx, + e.detectionCache, + data.detector.Detector, + data.chunk.Verify, + data.chunk.SecretID != 0, + matchBytes) t.Stop() cancel() if err != nil { From 6e9d76066d0e2df8cb28da50ce06072ea6060a60 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 15:32:42 -0400 Subject: [PATCH 05/43] clear stuff in cached copy --- pkg/detectioncaching/detectioncaching.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index f2bd6157de9f..668bbc5587dc 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -4,6 +4,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/cache" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) func FromDataCached( @@ -31,7 +32,10 @@ func FromDataCached( var cachedResults []detectors.Result for _, r := range withoutVerification { if cacheHit, ok := cache.Get(cacheKey(r)); ok { - cachedResults = append(cachedResults, *cacheHit) + returnedCopy := *cacheHit + returnedCopy.Raw = r.Raw + returnedCopy.RawV2 = r.RawV2 + cachedResults = append(cachedResults, returnedCopy) } else { everythingCached = false break @@ -58,7 +62,13 @@ func verifyAndCache( } for _, r := range results { - cache.Set(cacheKey(r), &r) + copyForCaching := r + // Do not persist raw secret values in a long-lived cache + copyForCaching.Raw = nil + copyForCaching.RawV2 = nil + // Decoder type will be set later, so clear it out now to minimize the chance of accidentally cloning it + copyForCaching.DecoderType = detectorspb.DecoderType_UNKNOWN + cache.Set(cacheKey(r), ©ForCaching) } return results, nil From 61a70a31834ab090259404893f657abe69e702b6 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 16:23:20 -0400 Subject: [PATCH 06/43] fiddle with pointers more --- pkg/detectioncaching/detectioncaching.go | 50 +++++++++--------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index 668bbc5587dc..e9807900833d 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -15,6 +15,7 @@ func FromDataCached( forceCacheMiss bool, data []byte, ) ([]detectors.Result, error) { + withoutVerification, err := detector.FromData(ctx, false, data) if err != nil { return nil, err @@ -24,44 +25,31 @@ func FromDataCached( return withoutVerification, nil } - if forceCacheMiss { - return verifyAndCache(ctx, cache, detector, data) - } - - everythingCached := false - var cachedResults []detectors.Result - for _, r := range withoutVerification { - if cacheHit, ok := cache.Get(cacheKey(r)); ok { - returnedCopy := *cacheHit - returnedCopy.Raw = r.Raw - returnedCopy.RawV2 = r.RawV2 - cachedResults = append(cachedResults, returnedCopy) - } else { - everythingCached = false - break + if !forceCacheMiss { + isEverythingCached := false + var fromCache []detectors.Result + for _, r := range withoutVerification { + if cacheHit, ok := cache.Get(cacheKey(r)); ok { + fromCache = append(fromCache, *cacheHit) + fromCache[len(fromCache)-1].Raw = r.Raw + fromCache[len(fromCache)-1].RawV2 = r.RawV2 + } else { + isEverythingCached = false + break + } } - } - if everythingCached { - return cachedResults, nil + if isEverythingCached { + return fromCache, nil + } } - return verifyAndCache(ctx, cache, detector, data) -} - -func verifyAndCache( - ctx context.Context, - cache cache.Cache[*detectors.Result], - detector detectors.Detector, - data []byte, -) ([]detectors.Result, error) { - - results, err := detector.FromData(ctx, true, data) + withVerification, err := detector.FromData(ctx, true, data) if err != nil { return nil, err } - for _, r := range results { + for _, r := range withVerification { copyForCaching := r // Do not persist raw secret values in a long-lived cache copyForCaching.Raw = nil @@ -71,7 +59,7 @@ func verifyAndCache( cache.Set(cacheKey(r), ©ForCaching) } - return results, nil + return withVerification, nil } func cacheKey(result detectors.Result) string { From 3e5ff9cd0e762a47e9c63b059a51b9f5ac40ca19 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 16:24:46 -0400 Subject: [PATCH 07/43] rename to verificationCache --- pkg/detectioncaching/detectioncaching.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index e9807900833d..762b9e3cd603 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -9,7 +9,7 @@ import ( func FromDataCached( ctx context.Context, - cache cache.Cache[*detectors.Result], + verificationCache cache.Cache[*detectors.Result], detector detectors.Detector, verify bool, forceCacheMiss bool, @@ -29,7 +29,7 @@ func FromDataCached( isEverythingCached := false var fromCache []detectors.Result for _, r := range withoutVerification { - if cacheHit, ok := cache.Get(cacheKey(r)); ok { + if cacheHit, ok := verificationCache.Get(cacheKey(r)); ok { fromCache = append(fromCache, *cacheHit) fromCache[len(fromCache)-1].Raw = r.Raw fromCache[len(fromCache)-1].RawV2 = r.RawV2 @@ -56,7 +56,7 @@ func FromDataCached( copyForCaching.RawV2 = nil // Decoder type will be set later, so clear it out now to minimize the chance of accidentally cloning it copyForCaching.DecoderType = detectorspb.DecoderType_UNKNOWN - cache.Set(cacheKey(r), ©ForCaching) + verificationCache.Set(cacheKey(r), ©ForCaching) } return withVerification, nil From 5ac0139d387165eb9edbadc9542b3210259c2152 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 16:33:28 -0400 Subject: [PATCH 08/43] inject getCacheKey --- pkg/detectioncaching/detectioncaching.go | 9 +++------ pkg/engine/engine.go | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index 762b9e3cd603..f6c80aa99877 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -10,6 +10,7 @@ import ( func FromDataCached( ctx context.Context, verificationCache cache.Cache[*detectors.Result], + getCacheKey func(result *detectors.Result) string, detector detectors.Detector, verify bool, forceCacheMiss bool, @@ -29,7 +30,7 @@ func FromDataCached( isEverythingCached := false var fromCache []detectors.Result for _, r := range withoutVerification { - if cacheHit, ok := verificationCache.Get(cacheKey(r)); ok { + if cacheHit, ok := verificationCache.Get(getCacheKey(&r)); ok { fromCache = append(fromCache, *cacheHit) fromCache[len(fromCache)-1].Raw = r.Raw fromCache[len(fromCache)-1].RawV2 = r.RawV2 @@ -56,12 +57,8 @@ func FromDataCached( copyForCaching.RawV2 = nil // Decoder type will be set later, so clear it out now to minimize the chance of accidentally cloning it copyForCaching.DecoderType = detectorspb.DecoderType_UNKNOWN - verificationCache.Set(cacheKey(r), ©ForCaching) + verificationCache.Set(getCacheKey(&r), ©ForCaching) } return withVerification, nil } - -func cacheKey(result detectors.Result) string { - return string(result.Raw) + string(result.RawV2) -} diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index cb7c727ea571..50985f387725 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -1060,6 +1060,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { results, err := detectioncaching.FromDataCached( ctx, e.detectionCache, + func(r *detectors.Result) string { return string(r.Raw) + string(r.RawV2) }, data.detector.Detector, data.chunk.Verify, data.chunk.SecretID != 0, From a4e092cd2d48265ed02d2d96b4441fb77c42b5d0 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 17:53:07 -0400 Subject: [PATCH 09/43] optimize when forcing a cache miss --- pkg/detectioncaching/detectioncaching.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index f6c80aa99877..042847802b1b 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -17,16 +17,16 @@ func FromDataCached( data []byte, ) ([]detectors.Result, error) { - withoutVerification, err := detector.FromData(ctx, false, data) - if err != nil { - return nil, err - } + if !forceCacheMiss { + withoutVerification, err := detector.FromData(ctx, false, data) + if err != nil { + return nil, err + } - if !verify { - return withoutVerification, nil - } + if !verify { + return withoutVerification, nil + } - if !forceCacheMiss { isEverythingCached := false var fromCache []detectors.Result for _, r := range withoutVerification { @@ -45,7 +45,7 @@ func FromDataCached( } } - withVerification, err := detector.FromData(ctx, true, data) + withVerification, err := detector.FromData(ctx, verify, data) if err != nil { return nil, err } From 8d83daa02cc51413be2855637661db040310fa63 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 18:46:38 -0400 Subject: [PATCH 10/43] tweak --- pkg/detectioncaching/detectioncaching.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index 042847802b1b..78bae083dfb9 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -7,6 +7,21 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) +// FromDataCached executes detection on chunk data in a way that uses a provided verification cache to deduplicate +// verification requests when possible. +// +// If the provided cache is nil, all caching logic is skipped, and this function returns the result of the provided +// detector's FromData method. +// +// Otherwise, if forceCacheMiss is true, the provided detector's FromData method is called, and the results are used to +// update the cache before being returned. +// +// Otherwise, if verify is false, all caching logic is skipped, and this function returns the result of the provided +// detector's FromData method. +// +// Otherwise, detection proceeds in two stages. First, detection is executed without verification. Then, the cache is +// queried for each returned result. If all results are cache hits, then their cached values are returned. Otherwise, +// detection is executed again with verification, and the results are used to update the cache before being returned. func FromDataCached( ctx context.Context, verificationCache cache.Cache[*detectors.Result], @@ -17,6 +32,10 @@ func FromDataCached( data []byte, ) ([]detectors.Result, error) { + if verificationCache == nil { + return detector.FromData(ctx, verify, data) + } + if !forceCacheMiss { withoutVerification, err := detector.FromData(ctx, false, data) if err != nil { From fd583806ed90d8f5a918f23c209c19eb44b6634e Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 19:00:03 -0400 Subject: [PATCH 11/43] tweak more --- pkg/detectioncaching/detectioncaching.go | 22 +++++++++++----------- pkg/engine/engine.go | 5 ++--- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index 78bae083dfb9..8d27778b7c20 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -10,25 +10,25 @@ import ( // FromDataCached executes detection on chunk data in a way that uses a provided verification cache to deduplicate // verification requests when possible. // -// If the provided cache is nil, all caching logic is skipped, and this function returns the result of the provided -// detector's FromData method. +// If the provided cache is nil, this function simply returns the result of the provided detector's FromData method. // -// Otherwise, if forceCacheMiss is true, the provided detector's FromData method is called, and the results are used to -// update the cache before being returned. +// If verify is false, this function returns the result of the provided detector's FromData method. In this case, the +// cache is only updated if forceCacheUpdate is true. // -// Otherwise, if verify is false, all caching logic is skipped, and this function returns the result of the provided -// detector's FromData method. +// If verify is true, and forceCacheUpdate is false, this function first executes the provided detector's FromData +// method with verification disabled. Then, the cache is queried for each result. If they are all present in the cache, +// the cached values are returned. Otherwise, the provided detector's FromData method is invoked (again) with +// verification enabled, and the results are used to update the cache before being returned. // -// Otherwise, detection proceeds in two stages. First, detection is executed without verification. Then, the cache is -// queried for each returned result. If all results are cache hits, then their cached values are returned. Otherwise, -// detection is executed again with verification, and the results are used to update the cache before being returned. +// If verify is true, and forceCacheUpdate is also true, the provided detector's FromData method is invoked, and the +// results are used to update the cache before being returned. func FromDataCached( ctx context.Context, verificationCache cache.Cache[*detectors.Result], getCacheKey func(result *detectors.Result) string, detector detectors.Detector, verify bool, - forceCacheMiss bool, + forceCacheUpdate bool, data []byte, ) ([]detectors.Result, error) { @@ -36,7 +36,7 @@ func FromDataCached( return detector.FromData(ctx, verify, data) } - if !forceCacheMiss { + if !forceCacheUpdate { withoutVerification, err := detector.FromData(ctx, false, data) if err != nil { return nil, err diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 50985f387725..98a747c8790e 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -14,7 +14,6 @@ import ( "github.com/adrg/strutil/metrics" lru "github.com/hashicorp/golang-lru/v2" "github.com/trufflesecurity/trufflehog/v3/pkg/cache" - "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" "github.com/trufflesecurity/trufflehog/v3/pkg/detectioncaching" "google.golang.org/protobuf/proto" @@ -223,7 +222,7 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { concurrency: cfg.Concurrency, decoders: cfg.Decoders, detectors: cfg.Detectors, - detectionCache: simple.NewCache[*detectors.Result](), + detectionCache: nil, dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, @@ -1060,7 +1059,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { results, err := detectioncaching.FromDataCached( ctx, e.detectionCache, - func(r *detectors.Result) string { return string(r.Raw) + string(r.RawV2) }, + func(r *detectors.Result) string { panic("cache should be ignored") }, data.detector.Detector, data.chunk.Verify, data.chunk.SecretID != 0, From c119f2b6d7d5e192991fc30373d5f2484980e313 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 19:41:22 -0400 Subject: [PATCH 12/43] flag when cache was used --- pkg/detectioncaching/detectioncaching.go | 1 + pkg/detectors/detectors.go | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/detectioncaching/detectioncaching.go index 8d27778b7c20..b2f8fc71a5a0 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/detectioncaching/detectioncaching.go @@ -53,6 +53,7 @@ func FromDataCached( fromCache = append(fromCache, *cacheHit) fromCache[len(fromCache)-1].Raw = r.Raw fromCache[len(fromCache)-1].RawV2 = r.RawV2 + fromCache[len(fromCache)-1].VerificationFromCache = true } else { isEverythingCached = false break diff --git a/pkg/detectors/detectors.go b/pkg/detectors/detectors.go index 71fc8514f452..610ecfbc8c0a 100644 --- a/pkg/detectors/detectors.go +++ b/pkg/detectors/detectors.go @@ -90,8 +90,9 @@ type Result struct { // DetectorName is the name of the Detector. Used for custom detectors. DetectorName string // DecoderType is the type of Decoder. - DecoderType detectorspb.DecoderType - Verified bool + DecoderType detectorspb.DecoderType + Verified bool + VerificationFromCache bool // Raw contains the raw secret identifier data. Prefer IDs over secrets since it is used for deduping after hashing. Raw []byte // RawV2 contains the raw secret identifier that is a combination of both the ID and the secret. From 15ddd6335adbc517cc7983a4c42cf553853c8430 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 19:42:58 -0400 Subject: [PATCH 13/43] store key builder in engine --- pkg/engine/engine.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 98a747c8790e..5d559a3a2883 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -154,10 +154,11 @@ type Config struct { // customization through various options and configurations. type Engine struct { // CLI flags. - concurrency int - decoders []decoders.Decoder - detectors []detectors.Detector - detectionCache cache.Cache[*detectors.Result] + concurrency int + decoders []decoders.Decoder + detectors []detectors.Detector + detectionCache cache.Cache[*detectors.Result] + getDetectionCacheKey func(result *detectors.Result) string // Any detectors configured to override sources' verification flags detectorVerificationOverrides map[config.DetectorID]bool @@ -223,6 +224,7 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { decoders: cfg.Decoders, detectors: cfg.Detectors, detectionCache: nil, + getDetectionCacheKey: func(result *detectors.Result) string { panic("cache should be unused") }, dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, @@ -1059,7 +1061,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { results, err := detectioncaching.FromDataCached( ctx, e.detectionCache, - func(r *detectors.Result) string { panic("cache should be ignored") }, + e.getDetectionCacheKey, data.detector.Detector, data.chunk.Verify, data.chunk.SecretID != 0, From ea3a5593ba2ee5b25925223ccfca53c75c255b53 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 19:49:29 -0400 Subject: [PATCH 14/43] rename --- pkg/engine/engine.go | 24 ++++++++++--------- .../verificationcaching.go} | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) rename pkg/{detectioncaching/detectioncaching.go => verificationcaching/verificationcaching.go} (99%) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 5d559a3a2883..b2d1f7f28745 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -14,7 +14,7 @@ import ( "github.com/adrg/strutil/metrics" lru "github.com/hashicorp/golang-lru/v2" "github.com/trufflesecurity/trufflehog/v3/pkg/cache" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectioncaching" + "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" "google.golang.org/protobuf/proto" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -154,11 +154,13 @@ type Config struct { // customization through various options and configurations. type Engine struct { // CLI flags. - concurrency int - decoders []decoders.Decoder - detectors []detectors.Detector - detectionCache cache.Cache[*detectors.Result] - getDetectionCacheKey func(result *detectors.Result) string + concurrency int + decoders []decoders.Decoder + detectors []detectors.Detector + // verificationCache must be thread-safe + verificationCache cache.Cache[*detectors.Result] + // getVerificationCacheKey must be thread-safe + getVerificationCacheKey func(result *detectors.Result) string // Any detectors configured to override sources' verification flags detectorVerificationOverrides map[config.DetectorID]bool @@ -223,8 +225,8 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { concurrency: cfg.Concurrency, decoders: cfg.Decoders, detectors: cfg.Detectors, - detectionCache: nil, - getDetectionCacheKey: func(result *detectors.Result) string { panic("cache should be unused") }, + verificationCache: nil, + getVerificationCacheKey: func(result *detectors.Result) string { panic("cache should be unused") }, dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, @@ -1058,10 +1060,10 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { t := time.AfterFunc(detectionTimeout+1*time.Second, func() { ctx.Logger().Error(nil, "a detector ignored the context timeout") }) - results, err := detectioncaching.FromDataCached( + results, err := verificationcaching.FromDataCached( ctx, - e.detectionCache, - e.getDetectionCacheKey, + e.verificationCache, + e.getVerificationCacheKey, data.detector.Detector, data.chunk.Verify, data.chunk.SecretID != 0, diff --git a/pkg/detectioncaching/detectioncaching.go b/pkg/verificationcaching/verificationcaching.go similarity index 99% rename from pkg/detectioncaching/detectioncaching.go rename to pkg/verificationcaching/verificationcaching.go index b2f8fc71a5a0..68ab06956575 100644 --- a/pkg/detectioncaching/detectioncaching.go +++ b/pkg/verificationcaching/verificationcaching.go @@ -1,4 +1,4 @@ -package detectioncaching +package verificationcaching import ( "github.com/trufflesecurity/trufflehog/v3/pkg/cache" From bf3170d8660aa4b5bbc485c33969bbecc36c283f Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 17 Oct 2024 19:57:21 -0400 Subject: [PATCH 15/43] copy verification errors --- pkg/detectors/detectors.go | 10 +++++++++- .../verificationcaching.go | 20 ++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/pkg/detectors/detectors.go b/pkg/detectors/detectors.go index 610ecfbc8c0a..ac87fc29a59d 100644 --- a/pkg/detectors/detectors.go +++ b/pkg/detectors/detectors.go @@ -114,7 +114,15 @@ type Result struct { AnalysisInfo map[string]string } -// SetVerificationError is the only way to set a verification error. Any sensitive values should be passed-in as secrets to be redacted. +// CopyVerificationInfo clones verification info (status and error) from another Result struct. This is used when +// loading verification info from a verification cache. (A method is necessary because verification errors are not +// exported, to prevent the accidental storage of sensitive information in them.) +func (r *Result) CopyVerificationInfo(from *Result) { + r.Verified = from.Verified + r.verificationError = from.verificationError +} + +// SetVerificationError is the only way to set a new verification error. Any sensitive values should be passed-in as secrets to be redacted. func (r *Result) SetVerificationError(err error, secrets ...string) { if err != nil { r.verificationError = redactSecrets(err, secrets...) diff --git a/pkg/verificationcaching/verificationcaching.go b/pkg/verificationcaching/verificationcaching.go index 68ab06956575..8f217a87ef26 100644 --- a/pkg/verificationcaching/verificationcaching.go +++ b/pkg/verificationcaching/verificationcaching.go @@ -37,23 +37,19 @@ func FromDataCached( } if !forceCacheUpdate { - withoutVerification, err := detector.FromData(ctx, false, data) + withoutRemoteVerification, err := detector.FromData(ctx, false, data) if err != nil { return nil, err } if !verify { - return withoutVerification, nil + return withoutRemoteVerification, nil } isEverythingCached := false - var fromCache []detectors.Result - for _, r := range withoutVerification { + for _, r := range withoutRemoteVerification { if cacheHit, ok := verificationCache.Get(getCacheKey(&r)); ok { - fromCache = append(fromCache, *cacheHit) - fromCache[len(fromCache)-1].Raw = r.Raw - fromCache[len(fromCache)-1].RawV2 = r.RawV2 - fromCache[len(fromCache)-1].VerificationFromCache = true + r.CopyVerificationInfo(cacheHit) } else { isEverythingCached = false break @@ -61,16 +57,16 @@ func FromDataCached( } if isEverythingCached { - return fromCache, nil + return withoutRemoteVerification, nil } } - withVerification, err := detector.FromData(ctx, verify, data) + withRemoteVerification, err := detector.FromData(ctx, verify, data) if err != nil { return nil, err } - for _, r := range withVerification { + for _, r := range withRemoteVerification { copyForCaching := r // Do not persist raw secret values in a long-lived cache copyForCaching.Raw = nil @@ -80,5 +76,5 @@ func FromDataCached( verificationCache.Set(getCacheKey(&r), ©ForCaching) } - return withVerification, nil + return withRemoteVerification, nil } From b1265697c25fd8be27d7bd8021bc2a34eb94793a Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Mon, 2 Dec 2024 13:30:02 -0800 Subject: [PATCH 16/43] re-remove decodertype --- pkg/detectors/detectors.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/detectors/detectors.go b/pkg/detectors/detectors.go index 9b4064fd8275..9b3a4ffed841 100644 --- a/pkg/detectors/detectors.go +++ b/pkg/detectors/detectors.go @@ -89,9 +89,9 @@ type Result struct { DetectorType detectorspb.DetectorType // DetectorName is the name of the Detector. Used for custom detectors. DetectorName string - // DecoderType is the type of Decoder. - DecoderType detectorspb.DecoderType - Verified bool + Verified bool + // VerificationFromCache indicates whether this result's verification result came from the verification cache rather + // than an actual remote request. VerificationFromCache bool // Raw contains the raw secret identifier data. Prefer IDs over secrets since it is used for deduping after hashing. Raw []byte From 8ef29ec50fa7be54d7bbeedea9e736819a6bd347 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Mon, 2 Dec 2024 13:31:26 -0800 Subject: [PATCH 17/43] remove cached decoder type --- pkg/verificationcaching/verificationcaching.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/pkg/verificationcaching/verificationcaching.go b/pkg/verificationcaching/verificationcaching.go index 8f217a87ef26..576f8aec40f1 100644 --- a/pkg/verificationcaching/verificationcaching.go +++ b/pkg/verificationcaching/verificationcaching.go @@ -4,7 +4,6 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/cache" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) // FromDataCached executes detection on chunk data in a way that uses a provided verification cache to deduplicate @@ -71,8 +70,6 @@ func FromDataCached( // Do not persist raw secret values in a long-lived cache copyForCaching.Raw = nil copyForCaching.RawV2 = nil - // Decoder type will be set later, so clear it out now to minimize the chance of accidentally cloning it - copyForCaching.DecoderType = detectorspb.DecoderType_UNKNOWN verificationCache.Set(getCacheKey(&r), ©ForCaching) } From 20aa4eabde9833136f34c6a54bc56d9979fece67 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Fri, 6 Dec 2024 15:55:13 -0500 Subject: [PATCH 18/43] add tests and fix bugs --- .../verificationcaching.go | 11 +- .../verificationcaching_test.go | 210 ++++++++++++++++++ 2 files changed, 216 insertions(+), 5 deletions(-) create mode 100644 pkg/verificationcaching/verificationcaching_test.go diff --git a/pkg/verificationcaching/verificationcaching.go b/pkg/verificationcaching/verificationcaching.go index 576f8aec40f1..4befcf6aec96 100644 --- a/pkg/verificationcaching/verificationcaching.go +++ b/pkg/verificationcaching/verificationcaching.go @@ -23,7 +23,7 @@ import ( // results are used to update the cache before being returned. func FromDataCached( ctx context.Context, - verificationCache cache.Cache[*detectors.Result], + verificationCache cache.Cache[detectors.Result], getCacheKey func(result *detectors.Result) string, detector detectors.Detector, verify bool, @@ -45,10 +45,11 @@ func FromDataCached( return withoutRemoteVerification, nil } - isEverythingCached := false - for _, r := range withoutRemoteVerification { + isEverythingCached := true + for i, r := range withoutRemoteVerification { if cacheHit, ok := verificationCache.Get(getCacheKey(&r)); ok { - r.CopyVerificationInfo(cacheHit) + withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit) + withoutRemoteVerification[i].VerificationFromCache = true } else { isEverythingCached = false break @@ -70,7 +71,7 @@ func FromDataCached( // Do not persist raw secret values in a long-lived cache copyForCaching.Raw = nil copyForCaching.RawV2 = nil - verificationCache.Set(getCacheKey(&r), ©ForCaching) + verificationCache.Set(getCacheKey(&r), copyForCaching) } return withRemoteVerification, nil diff --git a/pkg/verificationcaching/verificationcaching_test.go b/pkg/verificationcaching/verificationcaching_test.go new file mode 100644 index 000000000000..072780f9f9c5 --- /dev/null +++ b/pkg/verificationcaching/verificationcaching_test.go @@ -0,0 +1,210 @@ +package verificationcaching + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" + logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type testDetector struct { + fromDataCallCount int + results []detectors.Result +} + +func (t *testDetector) FromData(_ context.Context, verify bool, _ []byte) ([]detectors.Result, error) { + t.fromDataCallCount = t.fromDataCallCount + 1 + var results []detectors.Result + for _, r := range t.results { + copy := detectors.Result{Redacted: r.Redacted, Raw: r.Raw, RawV2: r.RawV2} + if verify { + copy.CopyVerificationInfo(&r) + } + results = append(results, copy) + } + return results, nil +} + +func (t *testDetector) Keywords() []string { return nil } +func (t *testDetector) Type() detectorspb.DetectorType { return -1 } +func (t *testDetector) Description() string { return "" } + +var _ detectors.Detector = (*testDetector)(nil) + +func getCacheKey(result *detectors.Result) string { + return string(result.Raw) +} + +func TestFromDataCached_NilCache(t *testing.T) { + detector := testDetector{results: []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, + }} + + require.NotPanics(t, func() { + results, err := FromDataCached( + logContext.Background(), + nil, + func(result *detectors.Result) string { panic("shouldn't happen") }, + &detector, + true, + true, + nil) + + require.NoError(t, err) + assert.Equal(t, 1, detector.fromDataCallCount) + assert.ElementsMatch(t, detector.results, results) + }) +} + +func TestFromDataCached_VerifyFalseForceCacheUpdateFalse(t *testing.T) { + detector := testDetector{results: []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, + }} + cache := simple.NewCache[detectors.Result]() + + results, err := FromDataCached( + logContext.Background(), + cache, + getCacheKey, + &detector, + false, + false, + nil) + + require.NoError(t, err) + assert.Equal(t, 1, detector.fromDataCallCount) + assert.ElementsMatch(t, []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: false}, + }, results) + assert.Empty(t, cache.Values()) +} + +func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { + detector := testDetector{results: []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, + {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, + }} + detector.results[1].SetVerificationError(errors.New("test verification error")) + cache := simple.NewCache[detectors.Result]() + + results, err := FromDataCached( + logContext.Background(), + cache, + getCacheKey, + &detector, + false, + true, + nil) + + require.NoError(t, err) + assert.Equal(t, 1, detector.fromDataCallCount) + assert.ElementsMatch(t, []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: false}, + {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, + }, results) + assert.ElementsMatch(t, []detectors.Result{{Redacted: "hello"}, {Redacted: "world"}}, cache.Values()) +} + +func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T) { + remoteResults := []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, + {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, + } + remoteResults[1].SetVerificationError(errors.New("test verification error")) + detector := testDetector{results: remoteResults} + cacheData := []detectors.Result{ + {Redacted: "hello", Verified: false}, + {Redacted: "world", Verified: true}, + } + cacheData[0].SetVerificationError(errors.New("test verification error")) + cache := simple.NewCache[detectors.Result]() + cache.Set("hello", cacheData[0]) + cache.Set("world", cacheData[1]) + + results, err := FromDataCached( + logContext.Background(), + cache, + getCacheKey, + &detector, + true, + false, + nil) + + require.NoError(t, err) + assert.Equal(t, 1, detector.fromDataCallCount) + wantResults := []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: false, VerificationFromCache: true}, + {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: true, VerificationFromCache: true}, + } + wantResults[0].SetVerificationError(errors.New("test verification error")) + assert.ElementsMatch(t, wantResults, results) + assert.ElementsMatch(t, cacheData, cache.Values()) +} + +func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { + detector := testDetector{results: []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, + {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, + }} + detector.results[1].SetVerificationError(errors.New("test verification error")) + cacheData := []detectors.Result{ + {Redacted: "hello", Verified: false}, + } + cacheData[0].SetVerificationError(errors.New("test verification error")) + cache := simple.NewCacheWithData([]simple.CacheEntry[detectors.Result]{{Key: "hello", Value: cacheData[0]}}) + + results, err := FromDataCached( + logContext.Background(), + cache, + getCacheKey, + &detector, + true, + false, + nil) + + require.NoError(t, err) + assert.Equal(t, 2, detector.fromDataCallCount) + assert.ElementsMatch(t, detector.results, results) + wantCacheData := []detectors.Result{ + {Redacted: "hello", Verified: true}, + {Redacted: "world", Verified: false}, + } + wantCacheData[1].SetVerificationError(errors.New("test verification error")) + assert.ElementsMatch(t, wantCacheData, cache.Values()) +} + +func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { + detector := testDetector{results: []detectors.Result{ + {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, + {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, + }} + detector.results[1].SetVerificationError(errors.New("test verification error")) + cache := simple.NewCache[detectors.Result]() + cache.Set("hello", detectors.Result{Redacted: "hello", Verified: false}) + cache.Set("world", detectors.Result{Redacted: "world", Verified: true}) + + results, err := FromDataCached( + logContext.Background(), + cache, + getCacheKey, + &detector, + true, + true, + nil) + + require.NoError(t, err) + assert.Equal(t, 1, detector.fromDataCallCount) + assert.ElementsMatch(t, detector.results, results) + wantCacheData := []detectors.Result{ + {Redacted: "hello", Verified: true}, + {Redacted: "world", Verified: false}, + } + wantCacheData[1].SetVerificationError(errors.New("test verification error")) + assert.ElementsMatch(t, wantCacheData, cache.Values()) +} From 328e121b359138a88082cbbcd8199a8f66f7f120 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Fri, 6 Dec 2024 16:00:45 -0500 Subject: [PATCH 19/43] tweak formatting --- pkg/verificationcaching/verificationcaching_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/verificationcaching/verificationcaching_test.go b/pkg/verificationcaching/verificationcaching_test.go index 072780f9f9c5..46a0adbda4f7 100644 --- a/pkg/verificationcaching/verificationcaching_test.go +++ b/pkg/verificationcaching/verificationcaching_test.go @@ -108,7 +108,10 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: false}, {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, }, results) - assert.ElementsMatch(t, []detectors.Result{{Redacted: "hello"}, {Redacted: "world"}}, cache.Values()) + assert.ElementsMatch(t, []detectors.Result{ + {Redacted: "hello", Verified: false}, + {Redacted: "world", Verified: false}, + }, cache.Values()) } func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T) { From 817ea15bde1049959be1c7d91b86edec399496da Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Fri, 6 Dec 2024 16:03:05 -0500 Subject: [PATCH 20/43] update engine --- pkg/engine/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 59ecbe38f266..4bd2dc0179db 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -159,7 +159,7 @@ type Engine struct { decoders []decoders.Decoder detectors []detectors.Detector // verificationCache must be thread-safe - verificationCache cache.Cache[*detectors.Result] + verificationCache cache.Cache[detectors.Result] // getVerificationCacheKey must be thread-safe getVerificationCacheKey func(result *detectors.Result) string // Any detectors configured to override sources' verification flags From ca07df29d8f876e5c1222af096a8bc936c7fbc9a Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Fri, 6 Dec 2024 16:04:44 -0500 Subject: [PATCH 21/43] calculate cache key by value --- pkg/engine/engine.go | 4 ++-- pkg/verificationcaching/verificationcaching.go | 6 +++--- pkg/verificationcaching/verificationcaching_test.go | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 4bd2dc0179db..5e9e59cb1b03 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -161,7 +161,7 @@ type Engine struct { // verificationCache must be thread-safe verificationCache cache.Cache[detectors.Result] // getVerificationCacheKey must be thread-safe - getVerificationCacheKey func(result *detectors.Result) string + getVerificationCacheKey func(result detectors.Result) string // Any detectors configured to override sources' verification flags detectorVerificationOverrides map[config.DetectorID]bool @@ -227,7 +227,7 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { decoders: cfg.Decoders, detectors: cfg.Detectors, verificationCache: nil, - getVerificationCacheKey: func(result *detectors.Result) string { panic("cache should be unused") }, + getVerificationCacheKey: func(result detectors.Result) string { panic("cache should be unused") }, dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, diff --git a/pkg/verificationcaching/verificationcaching.go b/pkg/verificationcaching/verificationcaching.go index 4befcf6aec96..44cf739d9792 100644 --- a/pkg/verificationcaching/verificationcaching.go +++ b/pkg/verificationcaching/verificationcaching.go @@ -24,7 +24,7 @@ import ( func FromDataCached( ctx context.Context, verificationCache cache.Cache[detectors.Result], - getCacheKey func(result *detectors.Result) string, + getCacheKey func(result detectors.Result) string, detector detectors.Detector, verify bool, forceCacheUpdate bool, @@ -47,7 +47,7 @@ func FromDataCached( isEverythingCached := true for i, r := range withoutRemoteVerification { - if cacheHit, ok := verificationCache.Get(getCacheKey(&r)); ok { + if cacheHit, ok := verificationCache.Get(getCacheKey(r)); ok { withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit) withoutRemoteVerification[i].VerificationFromCache = true } else { @@ -71,7 +71,7 @@ func FromDataCached( // Do not persist raw secret values in a long-lived cache copyForCaching.Raw = nil copyForCaching.RawV2 = nil - verificationCache.Set(getCacheKey(&r), copyForCaching) + verificationCache.Set(getCacheKey(r), copyForCaching) } return withRemoteVerification, nil diff --git a/pkg/verificationcaching/verificationcaching_test.go b/pkg/verificationcaching/verificationcaching_test.go index 46a0adbda4f7..d7bb09c73a20 100644 --- a/pkg/verificationcaching/verificationcaching_test.go +++ b/pkg/verificationcaching/verificationcaching_test.go @@ -37,7 +37,7 @@ func (t *testDetector) Description() string { return "" } var _ detectors.Detector = (*testDetector)(nil) -func getCacheKey(result *detectors.Result) string { +func getCacheKey(result detectors.Result) string { return string(result.Raw) } @@ -50,7 +50,7 @@ func TestFromDataCached_NilCache(t *testing.T) { results, err := FromDataCached( logContext.Background(), nil, - func(result *detectors.Result) string { panic("shouldn't happen") }, + func(result detectors.Result) string { panic("shouldn't happen") }, &detector, true, true, From e2924dea3a9de272a2b1f126f70e208fa654ebcc Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Fri, 6 Dec 2024 16:06:46 -0500 Subject: [PATCH 22/43] update comment --- pkg/engine/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 5e9e59cb1b03..6fc779582cea 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -158,7 +158,7 @@ type Engine struct { concurrency int decoders []decoders.Decoder detectors []detectors.Detector - // verificationCache must be thread-safe + // verificationCache must be thread-safe. Leave nil to disable verification caching. verificationCache cache.Cache[detectors.Result] // getVerificationCacheKey must be thread-safe getVerificationCacheKey func(result detectors.Result) string From d5f8f31c97980e1c840a4e3344f45abe0473eddc Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Wed, 18 Dec 2024 14:38:21 -0800 Subject: [PATCH 23/43] add janky metrics + an actual cache --- main.go | 5 +++++ pkg/engine/engine.go | 5 +++-- pkg/verificationcaching/verificationcaching.go | 13 +++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 4c5cf2982863..4d4d01860750 100644 --- a/main.go +++ b/main.go @@ -20,6 +20,7 @@ import ( "github.com/go-logr/logr" "github.com/jpillora/overseer" "github.com/mattn/go-isatty" + "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" "go.uber.org/automaxprocs/maxprocs" "github.com/trufflesecurity/trufflehog/v3/pkg/analyzer" @@ -526,6 +527,10 @@ func run(state overseer.State) { "unverified_secrets", metrics.UnverifiedSecretsFound, "scan_duration", metrics.ScanDuration.String(), "trufflehog_version", version.BuildVersion, + "verification_cache_hits", verificationcaching.CacheHits.Load(), + "verification_cache_misses", verificationcaching.CacheMisses.Load(), + "verification_cache_hits_wasted", verificationcaching.CacheHitsWasted.Load(), + "verification_cached_calls_saved", verificationcaching.VerificationCallsSaved.Load(), ) if metrics.hasFoundResults && *fail { diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 8e322ccdd23a..c79c8367e48d 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -14,6 +14,7 @@ import ( "github.com/adrg/strutil/metrics" lru "github.com/hashicorp/golang-lru/v2" "github.com/trufflesecurity/trufflehog/v3/pkg/cache" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" "google.golang.org/protobuf/proto" @@ -226,8 +227,8 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { concurrency: cfg.Concurrency, decoders: cfg.Decoders, detectors: cfg.Detectors, - verificationCache: nil, - getVerificationCacheKey: func(result detectors.Result) string { panic("cache should be unused") }, + verificationCache: simple.NewCache[detectors.Result](), + getVerificationCacheKey: func(result detectors.Result) string { return string(result.Raw) + string(result.RawV2) }, dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, diff --git a/pkg/verificationcaching/verificationcaching.go b/pkg/verificationcaching/verificationcaching.go index 44cf739d9792..91a57cee97ab 100644 --- a/pkg/verificationcaching/verificationcaching.go +++ b/pkg/verificationcaching/verificationcaching.go @@ -1,11 +1,18 @@ package verificationcaching import ( + "sync/atomic" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache" "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" ) +var CacheHits atomic.Int32 +var CacheHitsWasted atomic.Int32 +var CacheMisses atomic.Int32 +var VerificationCallsSaved atomic.Int32 + // FromDataCached executes detection on chunk data in a way that uses a provided verification cache to deduplicate // verification requests when possible. // @@ -46,17 +53,23 @@ func FromDataCached( } isEverythingCached := true + var cacheHitsInCurrentChunk int32 for i, r := range withoutRemoteVerification { if cacheHit, ok := verificationCache.Get(getCacheKey(r)); ok { withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit) withoutRemoteVerification[i].VerificationFromCache = true + CacheHits.Add(1) + cacheHitsInCurrentChunk++ } else { + CacheMisses.Add(1) isEverythingCached = false + CacheHitsWasted.Add(cacheHitsInCurrentChunk) break } } if isEverythingCached { + VerificationCallsSaved.Add(int32(len(withoutRemoteVerification))) return withoutRemoteVerification, nil } } From abaa8dc001a9add736de6e38154ebcb763cdcfcb Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Wed, 18 Dec 2024 14:43:25 -0800 Subject: [PATCH 24/43] fix typo --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index 4d4d01860750..09e0d0497722 100644 --- a/main.go +++ b/main.go @@ -530,7 +530,7 @@ func run(state overseer.State) { "verification_cache_hits", verificationcaching.CacheHits.Load(), "verification_cache_misses", verificationcaching.CacheMisses.Load(), "verification_cache_hits_wasted", verificationcaching.CacheHitsWasted.Load(), - "verification_cached_calls_saved", verificationcaching.VerificationCallsSaved.Load(), + "verification_cache_calls_saved", verificationcaching.VerificationCallsSaved.Load(), ) if metrics.hasFoundResults && *fail { From e950bc945355f150d05046b8572386f0567288c2 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Wed, 18 Dec 2024 14:50:05 -0800 Subject: [PATCH 25/43] add flag --- main.go | 9 +++++++++ pkg/engine/engine.go | 8 +++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/main.go b/main.go index 09e0d0497722..df28537d0f18 100644 --- a/main.go +++ b/main.go @@ -20,6 +20,8 @@ import ( "github.com/go-logr/logr" "github.com/jpillora/overseer" "github.com/mattn/go-isatty" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" "go.uber.org/automaxprocs/maxprocs" @@ -77,6 +79,8 @@ var ( excludeDetectors = cli.Flag("exclude-detectors", "Comma separated list of detector types to exclude. Protobuf name or IDs may be used, as well as ranges. IDs defined here take precedence over the include list.").String() jobReportFile = cli.Flag("output-report", "Write a scan report to the provided path.").Hidden().OpenFile(os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666) + noVerificationCache = cli.Flag("no-verification-cache", "Disable verification caching").Bool() + // Add feature flags forceSkipBinaries = cli.Flag("force-skip-binaries", "Force skipping binaries.").Bool() forceSkipArchives = cli.Flag("force-skip-archives", "Force skipping archives.").Bool() @@ -502,6 +506,11 @@ func run(state overseer.State) { ShouldScanEntireChunk: *scanEntireChunk, } + if !*noVerificationCache { + engConf.VerificationCache = simple.NewCache[detectors.Result]() + engConf.GetVerificationCacheKey = func(result detectors.Result) string { return string(result.Raw) + string(result.RawV2) } + } + if *compareDetectionStrategies { if err := compareScans(ctx, cmd, engConf); err != nil { logFatal(err, "error comparing detection strategies") diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index c79c8367e48d..591285345850 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -14,7 +14,6 @@ import ( "github.com/adrg/strutil/metrics" lru "github.com/hashicorp/golang-lru/v2" "github.com/trufflesecurity/trufflehog/v3/pkg/cache" - "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" "google.golang.org/protobuf/proto" @@ -148,6 +147,9 @@ type Config struct { // VerificationOverlapWorkerMultiplier is used to determine the number of verification overlap workers to spawn. VerificationOverlapWorkerMultiplier int + + VerificationCache cache.Cache[detectors.Result] + GetVerificationCacheKey func(result detectors.Result) string } // Engine represents the core scanning engine responsible for detecting secrets in input data. @@ -227,8 +229,8 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { concurrency: cfg.Concurrency, decoders: cfg.Decoders, detectors: cfg.Detectors, - verificationCache: simple.NewCache[detectors.Result](), - getVerificationCacheKey: func(result detectors.Result) string { return string(result.Raw) + string(result.RawV2) }, + verificationCache: cfg.VerificationCache, + getVerificationCacheKey: cfg.GetVerificationCacheKey, dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, From add0c0ac24cb99436087ed15e6925cff813af7e3 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Wed, 18 Dec 2024 15:02:17 -0800 Subject: [PATCH 26/43] add to printers --- pkg/output/json.go | 38 ++++++++++++++++++++------------------ pkg/output/plain.go | 4 ++++ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/pkg/output/json.go b/pkg/output/json.go index d98e78909cfa..8a99d3206157 100644 --- a/pkg/output/json.go +++ b/pkg/output/json.go @@ -40,9 +40,10 @@ func (p *JSONPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata) // DetectorDescription is the description of the Detector. DetectorDescription string // DecoderName is the string name of the DecoderType. - DecoderName string - Verified bool - VerificationError string `json:",omitempty"` + DecoderName string + Verified bool + VerificationError string `json:",omitempty"` + VerificationFromCache bool // Raw contains the raw secret data. Raw string // RawV2 contains the raw secret identifier that is a combination of both the ID and the secret. @@ -54,21 +55,22 @@ func (p *JSONPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata) ExtraData map[string]string StructuredData *detectorspb.StructuredData }{ - SourceMetadata: r.SourceMetadata, - SourceID: r.SourceID, - SourceType: r.SourceType, - SourceName: r.SourceName, - DetectorType: r.DetectorType, - DetectorName: r.DetectorType.String(), - DetectorDescription: r.DetectorDescription, - DecoderName: r.DecoderType.String(), - Verified: r.Verified, - VerificationError: verificationErr, - Raw: string(r.Raw), - RawV2: string(r.RawV2), - Redacted: r.Redacted, - ExtraData: r.ExtraData, - StructuredData: r.StructuredData, + SourceMetadata: r.SourceMetadata, + SourceID: r.SourceID, + SourceType: r.SourceType, + SourceName: r.SourceName, + DetectorType: r.DetectorType, + DetectorName: r.DetectorType.String(), + DetectorDescription: r.DetectorDescription, + DecoderName: r.DecoderType.String(), + Verified: r.Verified, + VerificationError: verificationErr, + VerificationFromCache: r.VerificationFromCache, + Raw: string(r.Raw), + RawV2: string(r.RawV2), + Redacted: r.Redacted, + ExtraData: r.ExtraData, + StructuredData: r.StructuredData, } out, err := json.Marshal(v) if err != nil { diff --git a/pkg/output/plain.go b/pkg/output/plain.go index c6a9871617cb..f67d9268abcf 100644 --- a/pkg/output/plain.go +++ b/pkg/output/plain.go @@ -22,6 +22,7 @@ var ( boldGreenPrinter = color.New(color.Bold, color.FgHiGreen) whitePrinter = color.New(color.FgWhite) boldWhitePrinter = color.New(color.Bold, color.FgWhite) + cyanPrinter = color.New(color.FgCyan) ) // PlainPrinter is a printer that prints results in plain text format. @@ -56,6 +57,9 @@ func (p *PlainPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata) yellowPrinter.Printf("Verification issue: %s\n", out.VerificationError) } } + if r.VerificationFromCache { + cyanPrinter.Print("(Verification info cached)\n") + } printer.Printf("Detector Type: %s\n", out.DetectorType) printer.Printf("Decoder Type: %s\n", out.DecoderType) printer.Printf("Raw result: %s\n", whitePrinter.Sprint(out.Raw)) From 076696a8a3249febef503aa41a42071393f1b139 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Wed, 18 Dec 2024 15:49:54 -0800 Subject: [PATCH 27/43] add new metric --- main.go | 1 + pkg/verificationcaching/verificationcaching.go | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/main.go b/main.go index df28537d0f18..bd7c3f421107 100644 --- a/main.go +++ b/main.go @@ -540,6 +540,7 @@ func run(state overseer.State) { "verification_cache_misses", verificationcaching.CacheMisses.Load(), "verification_cache_hits_wasted", verificationcaching.CacheHitsWasted.Load(), "verification_cache_calls_saved", verificationcaching.VerificationCallsSaved.Load(), + "verification_time_spent", verificationcaching.VerificationTimeSpentMS.Load(), ) if metrics.hasFoundResults && *fail { diff --git a/pkg/verificationcaching/verificationcaching.go b/pkg/verificationcaching/verificationcaching.go index 91a57cee97ab..a9724599c939 100644 --- a/pkg/verificationcaching/verificationcaching.go +++ b/pkg/verificationcaching/verificationcaching.go @@ -2,6 +2,7 @@ package verificationcaching import ( "sync/atomic" + "time" "github.com/trufflesecurity/trufflehog/v3/pkg/cache" "github.com/trufflesecurity/trufflehog/v3/pkg/context" @@ -12,6 +13,7 @@ var CacheHits atomic.Int32 var CacheHitsWasted atomic.Int32 var CacheMisses atomic.Int32 var VerificationCallsSaved atomic.Int32 +var VerificationTimeSpentMS atomic.Int64 // FromDataCached executes detection on chunk data in a way that uses a provided verification cache to deduplicate // verification requests when possible. @@ -39,6 +41,12 @@ func FromDataCached( ) ([]detectors.Result, error) { if verificationCache == nil { + start := time.Now() + defer func() { + if verify { + VerificationTimeSpentMS.Add(time.Since(start).Milliseconds()) + } + }() return detector.FromData(ctx, verify, data) } @@ -74,7 +82,11 @@ func FromDataCached( } } + start := time.Now() withRemoteVerification, err := detector.FromData(ctx, verify, data) + if verify { + VerificationTimeSpentMS.Add(time.Since(start).Milliseconds()) + } if err != nil { return nil, err } From 48d953560a510f5692acbd8a7a3b59b4efe78cea Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Wed, 18 Dec 2024 16:43:53 -0800 Subject: [PATCH 28/43] create struct --- pkg/engine/engine.go | 20 ++-- pkg/verificationcaching/verification_cache.go | 106 ++++++++++++++++++ ...ing_test.go => verification_cache_test.go} | 70 ++++++------ .../verificationcaching.go | 103 ----------------- 4 files changed, 149 insertions(+), 150 deletions(-) create mode 100644 pkg/verificationcaching/verification_cache.go rename pkg/verificationcaching/{verificationcaching_test.go => verification_cache_test.go} (72%) delete mode 100644 pkg/verificationcaching/verificationcaching.go diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 591285345850..a55df1c2d52b 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -148,7 +148,7 @@ type Config struct { // VerificationOverlapWorkerMultiplier is used to determine the number of verification overlap workers to spawn. VerificationOverlapWorkerMultiplier int - VerificationCache cache.Cache[detectors.Result] + VerificationResultCache cache.Cache[detectors.Result] GetVerificationCacheKey func(result detectors.Result) string } @@ -158,13 +158,10 @@ type Config struct { // customization through various options and configurations. type Engine struct { // CLI flags. - concurrency int - decoders []decoders.Decoder - detectors []detectors.Detector - // verificationCache must be thread-safe. Leave nil to disable verification caching. - verificationCache cache.Cache[detectors.Result] - // getVerificationCacheKey must be thread-safe - getVerificationCacheKey func(result detectors.Result) string + concurrency int + decoders []decoders.Decoder + detectors []detectors.Detector + verificationCache verificationcaching.VerificationCache // Any detectors configured to override sources' verification flags detectorVerificationOverrides map[config.DetectorID]bool @@ -229,8 +226,7 @@ func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { concurrency: cfg.Concurrency, decoders: cfg.Decoders, detectors: cfg.Detectors, - verificationCache: cfg.VerificationCache, - getVerificationCacheKey: cfg.GetVerificationCacheKey, + verificationCache: verificationcaching.New(cfg.VerificationResultCache, cfg.GetVerificationCacheKey), dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, @@ -1067,10 +1063,8 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { t := time.AfterFunc(detectionTimeout+1*time.Second, func() { ctx.Logger().Error(nil, "a detector ignored the context timeout") }) - results, err := verificationcaching.FromDataCached( + results, err := e.verificationCache.FromData( ctx, - e.verificationCache, - e.getVerificationCacheKey, data.detector.Detector, data.chunk.Verify, data.chunk.SecretID != 0, diff --git a/pkg/verificationcaching/verification_cache.go b/pkg/verificationcaching/verification_cache.go new file mode 100644 index 000000000000..659fb9a7647f --- /dev/null +++ b/pkg/verificationcaching/verification_cache.go @@ -0,0 +1,106 @@ +package verificationcaching + +import ( + "context" + "sync/atomic" + "time" + + "github.com/trufflesecurity/trufflehog/v3/pkg/cache" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" +) + +type VerificationCache struct { + getResultCacheKey func(result detectors.Result) string + resultCache cache.Cache[detectors.Result] + + Metrics VerificationCacheMetrics +} + +type VerificationCacheMetrics struct { + CredentialVerificationsSaved atomic.Int32 + FromDataVerifyTimeSpentMS atomic.Int64 + ResultCacheHits atomic.Int32 + ResultCacheHitsWasted atomic.Int32 + ResultCacheMisses atomic.Int32 +} + +func New( + resultCache cache.Cache[detectors.Result], + getResultCacheKey func(result detectors.Result) string, +) VerificationCache { + return VerificationCache{ + getResultCacheKey: getResultCacheKey, + resultCache: resultCache, + } +} + +func (v *VerificationCache) FromData( + ctx context.Context, + detector detectors.Detector, + verify bool, + forceCacheUpdate bool, + data []byte, +) ([]detectors.Result, error) { + + if v.resultCache == nil { + if verify { + start := time.Now() + defer func() { + v.Metrics.FromDataVerifyTimeSpentMS.Add(time.Since(start).Milliseconds()) + }() + } + + return detector.FromData(ctx, verify, data) + } + + if !forceCacheUpdate { + withoutRemoteVerification, err := detector.FromData(ctx, false, data) + if err != nil { + return nil, err + } + + if !verify { + return withoutRemoteVerification, nil + } + + isEverythingCached := true + var cacheHitsInCurrentChunk int32 + for i, r := range withoutRemoteVerification { + if cacheHit, ok := v.resultCache.Get(v.getResultCacheKey(r)); ok { + withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit) + withoutRemoteVerification[i].VerificationFromCache = true + v.Metrics.ResultCacheHits.Add(1) + cacheHitsInCurrentChunk++ + } else { + v.Metrics.ResultCacheMisses.Add(1) + isEverythingCached = false + v.Metrics.ResultCacheHitsWasted.Add(cacheHitsInCurrentChunk) + break + } + } + + if isEverythingCached { + v.Metrics.CredentialVerificationsSaved.Add(int32(len(withoutRemoteVerification))) + return withoutRemoteVerification, nil + } + } + + start := time.Now() + withRemoteVerification, err := detector.FromData(ctx, verify, data) + if verify { + v.Metrics.FromDataVerifyTimeSpentMS.Add(time.Since(start).Milliseconds()) + } + if err != nil { + return nil, err + } + + for _, r := range withRemoteVerification { + copyForCaching := r + // Do not persist raw secret values in a long-lived cache + copyForCaching.Raw = nil + copyForCaching.RawV2 = nil + v.resultCache.Set(v.getResultCacheKey(r), copyForCaching) + } + + return withRemoteVerification, nil +} diff --git a/pkg/verificationcaching/verificationcaching_test.go b/pkg/verificationcaching/verification_cache_test.go similarity index 72% rename from pkg/verificationcaching/verificationcaching_test.go rename to pkg/verificationcaching/verification_cache_test.go index d7bb09c73a20..343d728d997f 100644 --- a/pkg/verificationcaching/verificationcaching_test.go +++ b/pkg/verificationcaching/verification_cache_test.go @@ -41,16 +41,15 @@ func getCacheKey(result detectors.Result) string { return string(result.Raw) } -func TestFromDataCached_NilCache(t *testing.T) { +func TestVerificationCacheFromData_Passthrough(t *testing.T) { detector := testDetector{results: []detectors.Result{ {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, }} require.NotPanics(t, func() { - results, err := FromDataCached( + cache := New(nil, func(result detectors.Result) string { panic("shouldn't happen") }) + results, err := cache.FromData( logContext.Background(), - nil, - func(result detectors.Result) string { panic("shouldn't happen") }, &detector, true, true, @@ -59,19 +58,18 @@ func TestFromDataCached_NilCache(t *testing.T) { require.NoError(t, err) assert.Equal(t, 1, detector.fromDataCallCount) assert.ElementsMatch(t, detector.results, results) + assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) }) } -func TestFromDataCached_VerifyFalseForceCacheUpdateFalse(t *testing.T) { +func TestVerificationCacheFromData_VerifyFalseForceCacheUpdateFalse(t *testing.T) { detector := testDetector{results: []detectors.Result{ {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, }} - cache := simple.NewCache[detectors.Result]() + cache := New(simple.NewCache[detectors.Result](), getCacheKey) - results, err := FromDataCached( + results, err := cache.FromData( logContext.Background(), - cache, - getCacheKey, &detector, false, false, @@ -82,7 +80,8 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateFalse(t *testing.T) { assert.ElementsMatch(t, []detectors.Result{ {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: false}, }, results) - assert.Empty(t, cache.Values()) + assert.Empty(t, cache.resultCache.Values()) + assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) } func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { @@ -91,12 +90,10 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, }} detector.results[1].SetVerificationError(errors.New("test verification error")) - cache := simple.NewCache[detectors.Result]() + cache := New(simple.NewCache[detectors.Result](), getCacheKey) - results, err := FromDataCached( + results, err := cache.FromData( logContext.Background(), - cache, - getCacheKey, &detector, false, true, @@ -111,7 +108,8 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { assert.ElementsMatch(t, []detectors.Result{ {Redacted: "hello", Verified: false}, {Redacted: "world", Verified: false}, - }, cache.Values()) + }, cache.resultCache.Values()) + assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) } func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T) { @@ -126,14 +124,12 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T {Redacted: "world", Verified: true}, } cacheData[0].SetVerificationError(errors.New("test verification error")) - cache := simple.NewCache[detectors.Result]() - cache.Set("hello", cacheData[0]) - cache.Set("world", cacheData[1]) + cache := New(simple.NewCache[detectors.Result](), getCacheKey) + cache.resultCache.Set("hello", cacheData[0]) + cache.resultCache.Set("world", cacheData[1]) - results, err := FromDataCached( + results, err := cache.FromData( logContext.Background(), - cache, - getCacheKey, &detector, true, false, @@ -147,7 +143,11 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T } wantResults[0].SetVerificationError(errors.New("test verification error")) assert.ElementsMatch(t, wantResults, results) - assert.ElementsMatch(t, cacheData, cache.Values()) + assert.ElementsMatch(t, cacheData, cache.resultCache.Values()) + assert.Equal(t, int32(2), cache.Metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(2), cache.Metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(0), cache.Metrics.ResultCacheHitsWasted.Load()) + assert.Equal(t, int32(0), cache.Metrics.ResultCacheMisses.Load()) } func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { @@ -160,12 +160,11 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { {Redacted: "hello", Verified: false}, } cacheData[0].SetVerificationError(errors.New("test verification error")) - cache := simple.NewCacheWithData([]simple.CacheEntry[detectors.Result]{{Key: "hello", Value: cacheData[0]}}) + resultCache := simple.NewCacheWithData([]simple.CacheEntry[detectors.Result]{{Key: "hello", Value: cacheData[0]}}) + cache := New(resultCache, getCacheKey) - results, err := FromDataCached( + results, err := cache.FromData( logContext.Background(), - cache, - getCacheKey, &detector, true, false, @@ -179,7 +178,11 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { {Redacted: "world", Verified: false}, } wantCacheData[1].SetVerificationError(errors.New("test verification error")) - assert.ElementsMatch(t, wantCacheData, cache.Values()) + assert.ElementsMatch(t, wantCacheData, cache.resultCache.Values()) + assert.Equal(t, int32(0), cache.Metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(1), cache.Metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(1), cache.Metrics.ResultCacheMisses.Load()) + assert.Equal(t, int32(1), cache.Metrics.ResultCacheHitsWasted.Load()) } func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { @@ -188,14 +191,12 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, }} detector.results[1].SetVerificationError(errors.New("test verification error")) - cache := simple.NewCache[detectors.Result]() - cache.Set("hello", detectors.Result{Redacted: "hello", Verified: false}) - cache.Set("world", detectors.Result{Redacted: "world", Verified: true}) + cache := New(simple.NewCache[detectors.Result](), getCacheKey) + cache.resultCache.Set("hello", detectors.Result{Redacted: "hello", Verified: false}) + cache.resultCache.Set("world", detectors.Result{Redacted: "world", Verified: true}) - results, err := FromDataCached( + results, err := cache.FromData( logContext.Background(), - cache, - getCacheKey, &detector, true, true, @@ -209,5 +210,6 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { {Redacted: "world", Verified: false}, } wantCacheData[1].SetVerificationError(errors.New("test verification error")) - assert.ElementsMatch(t, wantCacheData, cache.Values()) + assert.ElementsMatch(t, wantCacheData, cache.resultCache.Values()) + assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) } diff --git a/pkg/verificationcaching/verificationcaching.go b/pkg/verificationcaching/verificationcaching.go deleted file mode 100644 index a9724599c939..000000000000 --- a/pkg/verificationcaching/verificationcaching.go +++ /dev/null @@ -1,103 +0,0 @@ -package verificationcaching - -import ( - "sync/atomic" - "time" - - "github.com/trufflesecurity/trufflehog/v3/pkg/cache" - "github.com/trufflesecurity/trufflehog/v3/pkg/context" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" -) - -var CacheHits atomic.Int32 -var CacheHitsWasted atomic.Int32 -var CacheMisses atomic.Int32 -var VerificationCallsSaved atomic.Int32 -var VerificationTimeSpentMS atomic.Int64 - -// FromDataCached executes detection on chunk data in a way that uses a provided verification cache to deduplicate -// verification requests when possible. -// -// If the provided cache is nil, this function simply returns the result of the provided detector's FromData method. -// -// If verify is false, this function returns the result of the provided detector's FromData method. In this case, the -// cache is only updated if forceCacheUpdate is true. -// -// If verify is true, and forceCacheUpdate is false, this function first executes the provided detector's FromData -// method with verification disabled. Then, the cache is queried for each result. If they are all present in the cache, -// the cached values are returned. Otherwise, the provided detector's FromData method is invoked (again) with -// verification enabled, and the results are used to update the cache before being returned. -// -// If verify is true, and forceCacheUpdate is also true, the provided detector's FromData method is invoked, and the -// results are used to update the cache before being returned. -func FromDataCached( - ctx context.Context, - verificationCache cache.Cache[detectors.Result], - getCacheKey func(result detectors.Result) string, - detector detectors.Detector, - verify bool, - forceCacheUpdate bool, - data []byte, -) ([]detectors.Result, error) { - - if verificationCache == nil { - start := time.Now() - defer func() { - if verify { - VerificationTimeSpentMS.Add(time.Since(start).Milliseconds()) - } - }() - return detector.FromData(ctx, verify, data) - } - - if !forceCacheUpdate { - withoutRemoteVerification, err := detector.FromData(ctx, false, data) - if err != nil { - return nil, err - } - - if !verify { - return withoutRemoteVerification, nil - } - - isEverythingCached := true - var cacheHitsInCurrentChunk int32 - for i, r := range withoutRemoteVerification { - if cacheHit, ok := verificationCache.Get(getCacheKey(r)); ok { - withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit) - withoutRemoteVerification[i].VerificationFromCache = true - CacheHits.Add(1) - cacheHitsInCurrentChunk++ - } else { - CacheMisses.Add(1) - isEverythingCached = false - CacheHitsWasted.Add(cacheHitsInCurrentChunk) - break - } - } - - if isEverythingCached { - VerificationCallsSaved.Add(int32(len(withoutRemoteVerification))) - return withoutRemoteVerification, nil - } - } - - start := time.Now() - withRemoteVerification, err := detector.FromData(ctx, verify, data) - if verify { - VerificationTimeSpentMS.Add(time.Since(start).Milliseconds()) - } - if err != nil { - return nil, err - } - - for _, r := range withRemoteVerification { - copyForCaching := r - // Do not persist raw secret values in a long-lived cache - copyForCaching.Raw = nil - copyForCaching.RawV2 = nil - verificationCache.Set(getCacheKey(r), copyForCaching) - } - - return withRemoteVerification, nil -} From 8482b4dab253c0fdd7949e0f0dbeebb9a5be8d5e Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Wed, 18 Dec 2024 17:33:29 -0800 Subject: [PATCH 29/43] rewrite metrics --- main.go | 51 ++++++++++------ pkg/engine/engine.go | 12 +++- pkg/verificationcaching/in_memory_metrics.go | 33 ++++++++++ pkg/verificationcaching/metrics_reporter.go | 9 +++ pkg/verificationcaching/verification_cache.go | 28 +++------ .../verification_cache_test.go | 61 +++++++++++++------ 6 files changed, 136 insertions(+), 58 deletions(-) create mode 100644 pkg/verificationcaching/in_memory_metrics.go create mode 100644 pkg/verificationcaching/metrics_reporter.go diff --git a/main.go b/main.go index bd7c3f421107..b2aab57238fa 100644 --- a/main.go +++ b/main.go @@ -485,29 +485,32 @@ func run(state overseer.State) { logFatal(err, "failed to configure results flag") } + verificationCacheMetrics := verificationcaching.InMemoryMetrics{} + engConf := engine.Config{ Concurrency: *concurrency, // The engine must always be configured with the list of // default detectors, which can be further filtered by the // user. The filters are applied by the engine and are only // subtractive. - Detectors: append(defaults.DefaultDetectors(), conf.Detectors...), - Verify: !*noVerification, - IncludeDetectors: *includeDetectors, - ExcludeDetectors: *excludeDetectors, - CustomVerifiersOnly: *customVerifiersOnly, - VerifierEndpoints: *verifiers, - Dispatcher: engine.NewPrinterDispatcher(printer), - FilterUnverified: *filterUnverified, - FilterEntropy: *filterEntropy, - VerificationOverlap: *allowVerificationOverlap, - Results: parsedResults, - PrintAvgDetectorTime: *printAvgDetectorTime, - ShouldScanEntireChunk: *scanEntireChunk, + Detectors: append(defaults.DefaultDetectors(), conf.Detectors...), + Verify: !*noVerification, + IncludeDetectors: *includeDetectors, + ExcludeDetectors: *excludeDetectors, + CustomVerifiersOnly: *customVerifiersOnly, + VerifierEndpoints: *verifiers, + Dispatcher: engine.NewPrinterDispatcher(printer), + FilterUnverified: *filterUnverified, + FilterEntropy: *filterEntropy, + VerificationOverlap: *allowVerificationOverlap, + Results: parsedResults, + PrintAvgDetectorTime: *printAvgDetectorTime, + ShouldScanEntireChunk: *scanEntireChunk, + VerificationCacheMetrics: &verificationCacheMetrics, } if !*noVerificationCache { - engConf.VerificationCache = simple.NewCache[detectors.Result]() + engConf.VerificationResultCache = simple.NewCache[detectors.Result]() engConf.GetVerificationCacheKey = func(result detectors.Result) string { return string(result.Raw) + string(result.RawV2) } } @@ -528,6 +531,20 @@ func run(state overseer.State) { logFatal(err, "error running scan") } + verificationCacheMetrics := struct { + Hits int32 + Misses int32 + HitsWasted int32 + AttemptsSaved int32 + VerificationTimeSpentMS int64 + }{ + Hits: verificationCacheMetrics.ResultCacheHits.Load(), + Misses: verificationCacheMetrics.ResultCacheMisses.Load(), + HitsWasted: verificationCacheMetrics.ResultCacheHitsWasted.Load(), + AttemptsSaved: verificationCacheMetrics.CredentialVerificationsSaved.Load(), + VerificationTimeSpentMS: verificationCacheMetrics.FromDataVerifyTimeSpentMS.Load(), + } + // Print results. logger.Info("finished scanning", "chunks", metrics.ChunksScanned, @@ -536,11 +553,7 @@ func run(state overseer.State) { "unverified_secrets", metrics.UnverifiedSecretsFound, "scan_duration", metrics.ScanDuration.String(), "trufflehog_version", version.BuildVersion, - "verification_cache_hits", verificationcaching.CacheHits.Load(), - "verification_cache_misses", verificationcaching.CacheMisses.Load(), - "verification_cache_hits_wasted", verificationcaching.CacheHitsWasted.Load(), - "verification_cache_calls_saved", verificationcaching.VerificationCallsSaved.Load(), - "verification_time_spent", verificationcaching.VerificationTimeSpentMS.Load(), + "verification_caching", verificationCacheMetrics, ) if metrics.hasFoundResults && *fail { diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index a55df1c2d52b..77996b08e5bb 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -148,8 +148,9 @@ type Config struct { // VerificationOverlapWorkerMultiplier is used to determine the number of verification overlap workers to spawn. VerificationOverlapWorkerMultiplier int - VerificationResultCache cache.Cache[detectors.Result] - GetVerificationCacheKey func(result detectors.Result) string + VerificationResultCache cache.Cache[detectors.Result] + GetVerificationCacheKey func(result detectors.Result) string + VerificationCacheMetrics verificationcaching.MetricsReporter } // Engine represents the core scanning engine responsible for detecting secrets in input data. @@ -222,11 +223,16 @@ type Engine struct { // NewEngine creates a new Engine instance with the provided configuration. func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { + verificationCache := verificationcaching.New( + cfg.VerificationResultCache, + cfg.GetVerificationCacheKey, + cfg.VerificationCacheMetrics) + engine := &Engine{ concurrency: cfg.Concurrency, decoders: cfg.Decoders, detectors: cfg.Detectors, - verificationCache: verificationcaching.New(cfg.VerificationResultCache, cfg.GetVerificationCacheKey), + verificationCache: verificationCache, dispatcher: cfg.Dispatcher, verify: cfg.Verify, filterUnverified: cfg.FilterUnverified, diff --git a/pkg/verificationcaching/in_memory_metrics.go b/pkg/verificationcaching/in_memory_metrics.go new file mode 100644 index 000000000000..24cd1b3a4f05 --- /dev/null +++ b/pkg/verificationcaching/in_memory_metrics.go @@ -0,0 +1,33 @@ +package verificationcaching + +import "sync/atomic" + +type InMemoryMetrics struct { + CredentialVerificationsSaved atomic.Int32 + FromDataVerifyTimeSpentMS atomic.Int64 + ResultCacheHits atomic.Int32 + ResultCacheHitsWasted atomic.Int32 + ResultCacheMisses atomic.Int32 +} + +var _ MetricsReporter = (*InMemoryMetrics)(nil) + +func (m *InMemoryMetrics) AddCredentialVerificationsSaved(count int) { + m.CredentialVerificationsSaved.Add(int32(count)) +} + +func (m *InMemoryMetrics) AddFromDataVerifyTimeSpent(ms int64) { + m.FromDataVerifyTimeSpentMS.Add(ms) +} + +func (m *InMemoryMetrics) AddResultCacheHits(count int) { + m.ResultCacheHits.Add(int32(count)) +} + +func (m *InMemoryMetrics) AddResultCacheMisses(count int) { + m.ResultCacheMisses.Add(int32(count)) +} + +func (m *InMemoryMetrics) AddResultCacheHitsWasted(count int) { + m.ResultCacheHitsWasted.Add(int32(count)) +} diff --git a/pkg/verificationcaching/metrics_reporter.go b/pkg/verificationcaching/metrics_reporter.go new file mode 100644 index 000000000000..b6f26868e9b2 --- /dev/null +++ b/pkg/verificationcaching/metrics_reporter.go @@ -0,0 +1,9 @@ +package verificationcaching + +type MetricsReporter interface { + AddCredentialVerificationsSaved(count int) + AddFromDataVerifyTimeSpent(ms int64) + AddResultCacheHits(count int) + AddResultCacheMisses(count int) + AddResultCacheHitsWasted(count int) +} diff --git a/pkg/verificationcaching/verification_cache.go b/pkg/verificationcaching/verification_cache.go index 659fb9a7647f..8a36f1418563 100644 --- a/pkg/verificationcaching/verification_cache.go +++ b/pkg/verificationcaching/verification_cache.go @@ -2,7 +2,6 @@ package verificationcaching import ( "context" - "sync/atomic" "time" "github.com/trufflesecurity/trufflehog/v3/pkg/cache" @@ -11,25 +10,18 @@ import ( type VerificationCache struct { getResultCacheKey func(result detectors.Result) string + metrics MetricsReporter resultCache cache.Cache[detectors.Result] - - Metrics VerificationCacheMetrics -} - -type VerificationCacheMetrics struct { - CredentialVerificationsSaved atomic.Int32 - FromDataVerifyTimeSpentMS atomic.Int64 - ResultCacheHits atomic.Int32 - ResultCacheHitsWasted atomic.Int32 - ResultCacheMisses atomic.Int32 } func New( resultCache cache.Cache[detectors.Result], getResultCacheKey func(result detectors.Result) string, + metrics MetricsReporter, ) VerificationCache { return VerificationCache{ getResultCacheKey: getResultCacheKey, + metrics: metrics, resultCache: resultCache, } } @@ -46,7 +38,7 @@ func (v *VerificationCache) FromData( if verify { start := time.Now() defer func() { - v.Metrics.FromDataVerifyTimeSpentMS.Add(time.Since(start).Milliseconds()) + v.metrics.AddFromDataVerifyTimeSpent(time.Since(start).Milliseconds()) }() } @@ -64,23 +56,23 @@ func (v *VerificationCache) FromData( } isEverythingCached := true - var cacheHitsInCurrentChunk int32 + var cacheHitsInCurrentChunk int for i, r := range withoutRemoteVerification { if cacheHit, ok := v.resultCache.Get(v.getResultCacheKey(r)); ok { withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit) withoutRemoteVerification[i].VerificationFromCache = true - v.Metrics.ResultCacheHits.Add(1) + v.metrics.AddResultCacheHits(1) cacheHitsInCurrentChunk++ } else { - v.Metrics.ResultCacheMisses.Add(1) + v.metrics.AddResultCacheMisses(1) isEverythingCached = false - v.Metrics.ResultCacheHitsWasted.Add(cacheHitsInCurrentChunk) + v.metrics.AddResultCacheHitsWasted(cacheHitsInCurrentChunk) break } } if isEverythingCached { - v.Metrics.CredentialVerificationsSaved.Add(int32(len(withoutRemoteVerification))) + v.metrics.AddCredentialVerificationsSaved(len(withoutRemoteVerification)) return withoutRemoteVerification, nil } } @@ -88,7 +80,7 @@ func (v *VerificationCache) FromData( start := time.Now() withRemoteVerification, err := detector.FromData(ctx, verify, data) if verify { - v.Metrics.FromDataVerifyTimeSpentMS.Add(time.Since(start).Milliseconds()) + v.metrics.AddFromDataVerifyTimeSpent(time.Since(start).Milliseconds()) } if err != nil { return nil, err diff --git a/pkg/verificationcaching/verification_cache_test.go b/pkg/verificationcaching/verification_cache_test.go index 343d728d997f..febf39f6541d 100644 --- a/pkg/verificationcaching/verification_cache_test.go +++ b/pkg/verificationcaching/verification_cache_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -28,6 +29,11 @@ func (t *testDetector) FromData(_ context.Context, verify bool, _ []byte) ([]det } results = append(results, copy) } + + // The metric timing resolution is 1 ms, so the detector needs to artificially slow down so that it can actually be + // monitored. + time.Sleep(2 * time.Millisecond) + return results, nil } @@ -46,8 +52,12 @@ func TestVerificationCacheFromData_Passthrough(t *testing.T) { {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, }} + metrics := InMemoryMetrics{} require.NotPanics(t, func() { - cache := New(nil, func(result detectors.Result) string { panic("shouldn't happen") }) + cache := New( + nil, + func(result detectors.Result) string { panic("shouldn't happen") }, + &metrics) results, err := cache.FromData( logContext.Background(), &detector, @@ -58,7 +68,11 @@ func TestVerificationCacheFromData_Passthrough(t *testing.T) { require.NoError(t, err) assert.Equal(t, 1, detector.fromDataCallCount) assert.ElementsMatch(t, detector.results, results) - assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) + assert.Less(t, int64(0), metrics.FromDataVerifyTimeSpentMS.Load()) + assert.Equal(t, int32(0), metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheMisses.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHitsWasted.Load()) }) } @@ -66,7 +80,8 @@ func TestVerificationCacheFromData_VerifyFalseForceCacheUpdateFalse(t *testing.T detector := testDetector{results: []detectors.Result{ {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, }} - cache := New(simple.NewCache[detectors.Result](), getCacheKey) + metrics := InMemoryMetrics{} + cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) results, err := cache.FromData( logContext.Background(), @@ -81,7 +96,7 @@ func TestVerificationCacheFromData_VerifyFalseForceCacheUpdateFalse(t *testing.T {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: false}, }, results) assert.Empty(t, cache.resultCache.Values()) - assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) + assert.Equal(t, InMemoryMetrics{}, metrics) } func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { @@ -90,7 +105,8 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, }} detector.results[1].SetVerificationError(errors.New("test verification error")) - cache := New(simple.NewCache[detectors.Result](), getCacheKey) + metrics := InMemoryMetrics{} + cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) results, err := cache.FromData( logContext.Background(), @@ -109,7 +125,7 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { {Redacted: "hello", Verified: false}, {Redacted: "world", Verified: false}, }, cache.resultCache.Values()) - assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) + assert.Equal(t, InMemoryMetrics{}, metrics) } func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T) { @@ -124,7 +140,8 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T {Redacted: "world", Verified: true}, } cacheData[0].SetVerificationError(errors.New("test verification error")) - cache := New(simple.NewCache[detectors.Result](), getCacheKey) + metrics := InMemoryMetrics{} + cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) cache.resultCache.Set("hello", cacheData[0]) cache.resultCache.Set("world", cacheData[1]) @@ -144,10 +161,11 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T wantResults[0].SetVerificationError(errors.New("test verification error")) assert.ElementsMatch(t, wantResults, results) assert.ElementsMatch(t, cacheData, cache.resultCache.Values()) - assert.Equal(t, int32(2), cache.Metrics.CredentialVerificationsSaved.Load()) - assert.Equal(t, int32(2), cache.Metrics.ResultCacheHits.Load()) - assert.Equal(t, int32(0), cache.Metrics.ResultCacheHitsWasted.Load()) - assert.Equal(t, int32(0), cache.Metrics.ResultCacheMisses.Load()) + assert.Equal(t, int64(0), metrics.FromDataVerifyTimeSpentMS.Load()) + assert.Equal(t, int32(2), metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(2), metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHitsWasted.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheMisses.Load()) } func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { @@ -161,7 +179,8 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { } cacheData[0].SetVerificationError(errors.New("test verification error")) resultCache := simple.NewCacheWithData([]simple.CacheEntry[detectors.Result]{{Key: "hello", Value: cacheData[0]}}) - cache := New(resultCache, getCacheKey) + metrics := InMemoryMetrics{} + cache := New(resultCache, getCacheKey, &metrics) results, err := cache.FromData( logContext.Background(), @@ -179,10 +198,11 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { } wantCacheData[1].SetVerificationError(errors.New("test verification error")) assert.ElementsMatch(t, wantCacheData, cache.resultCache.Values()) - assert.Equal(t, int32(0), cache.Metrics.CredentialVerificationsSaved.Load()) - assert.Equal(t, int32(1), cache.Metrics.ResultCacheHits.Load()) - assert.Equal(t, int32(1), cache.Metrics.ResultCacheMisses.Load()) - assert.Equal(t, int32(1), cache.Metrics.ResultCacheHitsWasted.Load()) + assert.Less(t, int64(0), metrics.FromDataVerifyTimeSpentMS.Load()) + assert.Equal(t, int32(0), metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(1), metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(1), metrics.ResultCacheMisses.Load()) + assert.Equal(t, int32(1), metrics.ResultCacheHitsWasted.Load()) } func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { @@ -191,7 +211,8 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, }} detector.results[1].SetVerificationError(errors.New("test verification error")) - cache := New(simple.NewCache[detectors.Result](), getCacheKey) + metrics := InMemoryMetrics{} + cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) cache.resultCache.Set("hello", detectors.Result{Redacted: "hello", Verified: false}) cache.resultCache.Set("world", detectors.Result{Redacted: "world", Verified: true}) @@ -211,5 +232,9 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { } wantCacheData[1].SetVerificationError(errors.New("test verification error")) assert.ElementsMatch(t, wantCacheData, cache.resultCache.Values()) - assert.Equal(t, VerificationCacheMetrics{}, cache.Metrics) + assert.Less(t, int64(0), metrics.FromDataVerifyTimeSpentMS.Load()) + assert.Equal(t, int32(0), metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheMisses.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHitsWasted.Load()) } From 2d21f7b13e2b74bd06d9df779aca59f4abfd869b Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 10:50:53 -0800 Subject: [PATCH 30/43] create ResultCache type --- pkg/verificationcaching/metrics_reporter.go | 1 + pkg/verificationcaching/result_cache.go | 9 +++++++++ pkg/verificationcaching/verification_cache.go | 5 ++--- 3 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 pkg/verificationcaching/result_cache.go diff --git a/pkg/verificationcaching/metrics_reporter.go b/pkg/verificationcaching/metrics_reporter.go index b6f26868e9b2..2a10e3de0627 100644 --- a/pkg/verificationcaching/metrics_reporter.go +++ b/pkg/verificationcaching/metrics_reporter.go @@ -1,5 +1,6 @@ package verificationcaching +// MetricsReporter is an interface used by a verification cache to report various metrics on its operation. type MetricsReporter interface { AddCredentialVerificationsSaved(count int) AddFromDataVerifyTimeSpent(ms int64) diff --git a/pkg/verificationcaching/result_cache.go b/pkg/verificationcaching/result_cache.go new file mode 100644 index 000000000000..15bc9f931071 --- /dev/null +++ b/pkg/verificationcaching/result_cache.go @@ -0,0 +1,9 @@ +package verificationcaching + +import ( + "github.com/trufflesecurity/trufflehog/v3/pkg/cache" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" +) + +// ResultCache is a cache that holds individual detector results. +type ResultCache cache.Cache[detectors.Result] diff --git a/pkg/verificationcaching/verification_cache.go b/pkg/verificationcaching/verification_cache.go index 8a36f1418563..6d7dd108e223 100644 --- a/pkg/verificationcaching/verification_cache.go +++ b/pkg/verificationcaching/verification_cache.go @@ -4,18 +4,17 @@ import ( "context" "time" - "github.com/trufflesecurity/trufflehog/v3/pkg/cache" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" ) type VerificationCache struct { getResultCacheKey func(result detectors.Result) string metrics MetricsReporter - resultCache cache.Cache[detectors.Result] + resultCache ResultCache } func New( - resultCache cache.Cache[detectors.Result], + resultCache ResultCache, getResultCacheKey func(result detectors.Result) string, metrics MetricsReporter, ) VerificationCache { From c7390d4c1253226f04e9ec68fa61555bc6be31cb Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 11:30:25 -0800 Subject: [PATCH 31/43] use blake2b instead of configurable cache key --- main.go | 1 - pkg/engine/engine.go | 5 +-- pkg/verificationcaching/verification_cache.go | 45 ++++++++++++++----- .../verification_cache_test.go | 38 ++++++++-------- 4 files changed, 54 insertions(+), 35 deletions(-) diff --git a/main.go b/main.go index b2aab57238fa..211592045080 100644 --- a/main.go +++ b/main.go @@ -511,7 +511,6 @@ func run(state overseer.State) { if !*noVerificationCache { engConf.VerificationResultCache = simple.NewCache[detectors.Result]() - engConf.GetVerificationCacheKey = func(result detectors.Result) string { return string(result.Raw) + string(result.RawV2) } } if *compareDetectionStrategies { diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 77996b08e5bb..b220a533cfad 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -13,7 +13,6 @@ import ( "github.com/adrg/strutil" "github.com/adrg/strutil/metrics" lru "github.com/hashicorp/golang-lru/v2" - "github.com/trufflesecurity/trufflehog/v3/pkg/cache" "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" "google.golang.org/protobuf/proto" @@ -148,8 +147,7 @@ type Config struct { // VerificationOverlapWorkerMultiplier is used to determine the number of verification overlap workers to spawn. VerificationOverlapWorkerMultiplier int - VerificationResultCache cache.Cache[detectors.Result] - GetVerificationCacheKey func(result detectors.Result) string + VerificationResultCache verificationcaching.ResultCache VerificationCacheMetrics verificationcaching.MetricsReporter } @@ -225,7 +223,6 @@ type Engine struct { func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { verificationCache := verificationcaching.New( cfg.VerificationResultCache, - cfg.GetVerificationCacheKey, cfg.VerificationCacheMetrics) engine := &Engine{ diff --git a/pkg/verificationcaching/verification_cache.go b/pkg/verificationcaching/verification_cache.go index 6d7dd108e223..9953328d89d0 100644 --- a/pkg/verificationcaching/verification_cache.go +++ b/pkg/verificationcaching/verification_cache.go @@ -1,27 +1,30 @@ package verificationcaching import ( - "context" + "sync" "time" + "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/hasher" ) type VerificationCache struct { - getResultCacheKey func(result detectors.Result) string - metrics MetricsReporter - resultCache ResultCache + metrics MetricsReporter + resultCache ResultCache + + hashMu sync.Mutex + hasher hasher.Hasher } func New( resultCache ResultCache, - getResultCacheKey func(result detectors.Result) string, metrics MetricsReporter, ) VerificationCache { return VerificationCache{ - getResultCacheKey: getResultCacheKey, - metrics: metrics, - resultCache: resultCache, + metrics: metrics, + resultCache: resultCache, + hasher: hasher.NewBlake2B(), } } @@ -57,7 +60,15 @@ func (v *VerificationCache) FromData( isEverythingCached := true var cacheHitsInCurrentChunk int for i, r := range withoutRemoteVerification { - if cacheHit, ok := v.resultCache.Get(v.getResultCacheKey(r)); ok { + cacheKey, err := v.getResultCacheKey(r) + if err != nil { + ctx.Logger().Error(err, "error getting result cache key for verification caching", + "operation", "read") + isEverythingCached = false + v.metrics.AddResultCacheHitsWasted(cacheHitsInCurrentChunk) + break + } + if cacheHit, ok := v.resultCache.Get(string(cacheKey)); ok { withoutRemoteVerification[i].CopyVerificationInfo(&cacheHit) withoutRemoteVerification[i].VerificationFromCache = true v.metrics.AddResultCacheHits(1) @@ -86,12 +97,26 @@ func (v *VerificationCache) FromData( } for _, r := range withRemoteVerification { + cacheKey, err := v.getResultCacheKey(r) + if err != nil { + ctx.Logger().Error(err, "error getting result cache key for verification caching", + "operation", "write") + continue + } + copyForCaching := r // Do not persist raw secret values in a long-lived cache copyForCaching.Raw = nil copyForCaching.RawV2 = nil - v.resultCache.Set(v.getResultCacheKey(r), copyForCaching) + v.resultCache.Set(string(cacheKey), copyForCaching) } return withRemoteVerification, nil } + +func (v *VerificationCache) getResultCacheKey(result detectors.Result) ([]byte, error) { + v.hashMu.Lock() + defer v.hashMu.Unlock() + + return v.hasher.Hash(append(result.Raw, result.RawV2...)) +} diff --git a/pkg/verificationcaching/verification_cache_test.go b/pkg/verificationcaching/verification_cache_test.go index febf39f6541d..cf7869be12e8 100644 --- a/pkg/verificationcaching/verification_cache_test.go +++ b/pkg/verificationcaching/verification_cache_test.go @@ -43,8 +43,10 @@ func (t *testDetector) Description() string { return "" } var _ detectors.Detector = (*testDetector)(nil) -func getCacheKey(result detectors.Result) string { - return string(result.Raw) +func getResultCacheKey(t *testing.T, cache *VerificationCache, result detectors.Result) string { + key, err := cache.getResultCacheKey(result) + require.NoError(t, err) + return string(key) } func TestVerificationCacheFromData_Passthrough(t *testing.T) { @@ -54,10 +56,7 @@ func TestVerificationCacheFromData_Passthrough(t *testing.T) { metrics := InMemoryMetrics{} require.NotPanics(t, func() { - cache := New( - nil, - func(result detectors.Result) string { panic("shouldn't happen") }, - &metrics) + cache := New(nil, &metrics) results, err := cache.FromData( logContext.Background(), &detector, @@ -81,7 +80,7 @@ func TestVerificationCacheFromData_VerifyFalseForceCacheUpdateFalse(t *testing.T {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, }} metrics := InMemoryMetrics{} - cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) + cache := New(simple.NewCache[detectors.Result](), &metrics) results, err := cache.FromData( logContext.Background(), @@ -106,7 +105,7 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { }} detector.results[1].SetVerificationError(errors.New("test verification error")) metrics := InMemoryMetrics{} - cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) + cache := New(simple.NewCache[detectors.Result](), &metrics) results, err := cache.FromData( logContext.Background(), @@ -141,9 +140,9 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T } cacheData[0].SetVerificationError(errors.New("test verification error")) metrics := InMemoryMetrics{} - cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) - cache.resultCache.Set("hello", cacheData[0]) - cache.resultCache.Set("world", cacheData[1]) + cache := New(simple.NewCache[detectors.Result](), &metrics) + cache.resultCache.Set(getResultCacheKey(t, &cache, remoteResults[0]), cacheData[0]) + cache.resultCache.Set(getResultCacheKey(t, &cache, remoteResults[1]), cacheData[1]) results, err := cache.FromData( logContext.Background(), @@ -174,13 +173,12 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { {Redacted: "world", Raw: []byte("world"), RawV2: []byte("worldV2"), Verified: false}, }} detector.results[1].SetVerificationError(errors.New("test verification error")) - cacheData := []detectors.Result{ - {Redacted: "hello", Verified: false}, - } - cacheData[0].SetVerificationError(errors.New("test verification error")) - resultCache := simple.NewCacheWithData([]simple.CacheEntry[detectors.Result]{{Key: "hello", Value: cacheData[0]}}) + cachedResult := detectors.Result{Redacted: "hello", Verified: false} + cachedResult.SetVerificationError(errors.New("test verification error")) + resultCache := simple.NewCache[detectors.Result]() metrics := InMemoryMetrics{} - cache := New(resultCache, getCacheKey, &metrics) + cache := New(resultCache, &metrics) + cache.resultCache.Set(getResultCacheKey(t, &cache, detector.results[0]), cachedResult) results, err := cache.FromData( logContext.Background(), @@ -212,9 +210,9 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { }} detector.results[1].SetVerificationError(errors.New("test verification error")) metrics := InMemoryMetrics{} - cache := New(simple.NewCache[detectors.Result](), getCacheKey, &metrics) - cache.resultCache.Set("hello", detectors.Result{Redacted: "hello", Verified: false}) - cache.resultCache.Set("world", detectors.Result{Redacted: "world", Verified: true}) + cache := New(simple.NewCache[detectors.Result](), &metrics) + cache.resultCache.Set(getResultCacheKey(t, &cache, detector.results[0]), detectors.Result{Redacted: "hello", Verified: false}) + cache.resultCache.Set(getResultCacheKey(t, &cache, detector.results[1]), detectors.Result{Redacted: "world", Verified: true}) results, err := cache.FromData( logContext.Background(), From 11e7906eb68785aaec845bc32b2414a3fa189ff4 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 13:32:13 -0800 Subject: [PATCH 32/43] add comments --- pkg/verificationcaching/metrics_reporter.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pkg/verificationcaching/metrics_reporter.go b/pkg/verificationcaching/metrics_reporter.go index 2a10e3de0627..84b21a558fcb 100644 --- a/pkg/verificationcaching/metrics_reporter.go +++ b/pkg/verificationcaching/metrics_reporter.go @@ -1,10 +1,25 @@ package verificationcaching -// MetricsReporter is an interface used by a verification cache to report various metrics on its operation. +// MetricsReporter is an interface used by a verification cache to report various metrics related to its operation. type MetricsReporter interface { + // AddCredentialVerificationsSaved records "saved" verification attempts, which is when credential verification + // status is loaded from the cache instead of retrieved from a remote verification endpoint. This number might not + // match the number of cache hits due to hit "wasting"; see AddResultCacheHitsWasted for more information. AddCredentialVerificationsSaved(count int) + + // AddFromDataVerifyTimeSpent records wall time spent in calls to FromData with verify = true. AddFromDataVerifyTimeSpent(ms int64) + + // AddResultCacheHits records result cache hits. Not all cache hits result in elided remote verification requests + // due to cache hit "wasting"; see AddResultCacheHitsWasted for more information. AddResultCacheHits(count int) + + // AddResultCacheMisses records result cache misses. AddResultCacheMisses(count int) + + // AddResultCacheHitsWasted records "wasted" result cache hits. A "wasted" result cache hit is a result cache hit + // that does not elide a remote verification request because there are other secret findings in the relevant chunk + // that are not cached. When this happens, the detector's FromData method must be called anyway, so the cache hit + // doesn't save any remote requests. AddResultCacheHitsWasted(count int) } From 7040b4df0ad8a6b39e83381fed4c79a48b825884 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 13:42:14 -0800 Subject: [PATCH 33/43] report durations --- pkg/verificationcaching/in_memory_metrics.go | 9 ++++++--- pkg/verificationcaching/metrics_reporter.go | 4 +++- pkg/verificationcaching/verification_cache.go | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pkg/verificationcaching/in_memory_metrics.go b/pkg/verificationcaching/in_memory_metrics.go index 24cd1b3a4f05..0fe05affa980 100644 --- a/pkg/verificationcaching/in_memory_metrics.go +++ b/pkg/verificationcaching/in_memory_metrics.go @@ -1,6 +1,9 @@ package verificationcaching -import "sync/atomic" +import ( + "sync/atomic" + "time" +) type InMemoryMetrics struct { CredentialVerificationsSaved atomic.Int32 @@ -16,8 +19,8 @@ func (m *InMemoryMetrics) AddCredentialVerificationsSaved(count int) { m.CredentialVerificationsSaved.Add(int32(count)) } -func (m *InMemoryMetrics) AddFromDataVerifyTimeSpent(ms int64) { - m.FromDataVerifyTimeSpentMS.Add(ms) +func (m *InMemoryMetrics) AddFromDataVerifyTimeSpent(wallTime time.Duration) { + m.FromDataVerifyTimeSpentMS.Add(wallTime.Milliseconds()) } func (m *InMemoryMetrics) AddResultCacheHits(count int) { diff --git a/pkg/verificationcaching/metrics_reporter.go b/pkg/verificationcaching/metrics_reporter.go index 84b21a558fcb..fc7a2653e999 100644 --- a/pkg/verificationcaching/metrics_reporter.go +++ b/pkg/verificationcaching/metrics_reporter.go @@ -1,5 +1,7 @@ package verificationcaching +import "time" + // MetricsReporter is an interface used by a verification cache to report various metrics related to its operation. type MetricsReporter interface { // AddCredentialVerificationsSaved records "saved" verification attempts, which is when credential verification @@ -8,7 +10,7 @@ type MetricsReporter interface { AddCredentialVerificationsSaved(count int) // AddFromDataVerifyTimeSpent records wall time spent in calls to FromData with verify = true. - AddFromDataVerifyTimeSpent(ms int64) + AddFromDataVerifyTimeSpent(wallTime time.Duration) // AddResultCacheHits records result cache hits. Not all cache hits result in elided remote verification requests // due to cache hit "wasting"; see AddResultCacheHitsWasted for more information. diff --git a/pkg/verificationcaching/verification_cache.go b/pkg/verificationcaching/verification_cache.go index 9953328d89d0..c618a9e61b1c 100644 --- a/pkg/verificationcaching/verification_cache.go +++ b/pkg/verificationcaching/verification_cache.go @@ -40,7 +40,7 @@ func (v *VerificationCache) FromData( if verify { start := time.Now() defer func() { - v.metrics.AddFromDataVerifyTimeSpent(time.Since(start).Milliseconds()) + v.metrics.AddFromDataVerifyTimeSpent(time.Since(start)) }() } @@ -90,7 +90,7 @@ func (v *VerificationCache) FromData( start := time.Now() withRemoteVerification, err := detector.FromData(ctx, verify, data) if verify { - v.metrics.AddFromDataVerifyTimeSpent(time.Since(start).Milliseconds()) + v.metrics.AddFromDataVerifyTimeSpent(time.Since(start)) } if err != nil { return nil, err From 572cd1b1482391679b9f937955dd08a256d4e3c4 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 13:49:07 -0800 Subject: [PATCH 34/43] re-indent --- pkg/verificationcaching/verification_cache.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/verificationcaching/verification_cache.go b/pkg/verificationcaching/verification_cache.go index c618a9e61b1c..fcadcf29d587 100644 --- a/pkg/verificationcaching/verification_cache.go +++ b/pkg/verificationcaching/verification_cache.go @@ -17,10 +17,7 @@ type VerificationCache struct { hasher hasher.Hasher } -func New( - resultCache ResultCache, - metrics MetricsReporter, -) VerificationCache { +func New(resultCache ResultCache, metrics MetricsReporter) VerificationCache { return VerificationCache{ metrics: metrics, resultCache: resultCache, From ce19d5abaaa047392a57995c377596880a3d1648 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 13:50:11 -0800 Subject: [PATCH 35/43] rename package --- main.go | 4 ++-- pkg/engine/engine.go | 12 +++++------- .../in_memory_metrics.go | 2 +- .../metrics_reporter.go | 2 +- .../result_cache.go | 2 +- .../verification_cache.go | 2 +- .../verification_cache_test.go | 2 +- 7 files changed, 12 insertions(+), 14 deletions(-) rename pkg/{verificationcaching => verificationcache}/in_memory_metrics.go (97%) rename pkg/{verificationcaching => verificationcache}/metrics_reporter.go (98%) rename pkg/{verificationcaching => verificationcache}/result_cache.go (89%) rename pkg/{verificationcaching => verificationcache}/verification_cache.go (99%) rename pkg/{verificationcaching => verificationcache}/verification_cache_test.go (99%) diff --git a/main.go b/main.go index 211592045080..9f7a66597382 100644 --- a/main.go +++ b/main.go @@ -22,7 +22,7 @@ import ( "github.com/mattn/go-isatty" "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" + "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcache" "go.uber.org/automaxprocs/maxprocs" "github.com/trufflesecurity/trufflehog/v3/pkg/analyzer" @@ -485,7 +485,7 @@ func run(state overseer.State) { logFatal(err, "failed to configure results flag") } - verificationCacheMetrics := verificationcaching.InMemoryMetrics{} + verificationCacheMetrics := verificationcache.InMemoryMetrics{} engConf := engine.Config{ Concurrency: *concurrency, diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index b220a533cfad..ab1598f63006 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -13,7 +13,7 @@ import ( "github.com/adrg/strutil" "github.com/adrg/strutil/metrics" lru "github.com/hashicorp/golang-lru/v2" - "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcaching" + "github.com/trufflesecurity/trufflehog/v3/pkg/verificationcache" "google.golang.org/protobuf/proto" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -147,8 +147,8 @@ type Config struct { // VerificationOverlapWorkerMultiplier is used to determine the number of verification overlap workers to spawn. VerificationOverlapWorkerMultiplier int - VerificationResultCache verificationcaching.ResultCache - VerificationCacheMetrics verificationcaching.MetricsReporter + VerificationResultCache verificationcache.ResultCache + VerificationCacheMetrics verificationcache.MetricsReporter } // Engine represents the core scanning engine responsible for detecting secrets in input data. @@ -160,7 +160,7 @@ type Engine struct { concurrency int decoders []decoders.Decoder detectors []detectors.Detector - verificationCache verificationcaching.VerificationCache + verificationCache verificationcache.VerificationCache // Any detectors configured to override sources' verification flags detectorVerificationOverrides map[config.DetectorID]bool @@ -221,9 +221,7 @@ type Engine struct { // NewEngine creates a new Engine instance with the provided configuration. func NewEngine(ctx context.Context, cfg *Config) (*Engine, error) { - verificationCache := verificationcaching.New( - cfg.VerificationResultCache, - cfg.VerificationCacheMetrics) + verificationCache := verificationcache.New(cfg.VerificationResultCache, cfg.VerificationCacheMetrics) engine := &Engine{ concurrency: cfg.Concurrency, diff --git a/pkg/verificationcaching/in_memory_metrics.go b/pkg/verificationcache/in_memory_metrics.go similarity index 97% rename from pkg/verificationcaching/in_memory_metrics.go rename to pkg/verificationcache/in_memory_metrics.go index 0fe05affa980..7f3446094f6b 100644 --- a/pkg/verificationcaching/in_memory_metrics.go +++ b/pkg/verificationcache/in_memory_metrics.go @@ -1,4 +1,4 @@ -package verificationcaching +package verificationcache import ( "sync/atomic" diff --git a/pkg/verificationcaching/metrics_reporter.go b/pkg/verificationcache/metrics_reporter.go similarity index 98% rename from pkg/verificationcaching/metrics_reporter.go rename to pkg/verificationcache/metrics_reporter.go index fc7a2653e999..bd3a56c2beec 100644 --- a/pkg/verificationcaching/metrics_reporter.go +++ b/pkg/verificationcache/metrics_reporter.go @@ -1,4 +1,4 @@ -package verificationcaching +package verificationcache import "time" diff --git a/pkg/verificationcaching/result_cache.go b/pkg/verificationcache/result_cache.go similarity index 89% rename from pkg/verificationcaching/result_cache.go rename to pkg/verificationcache/result_cache.go index 15bc9f931071..1c00b02c90d0 100644 --- a/pkg/verificationcaching/result_cache.go +++ b/pkg/verificationcache/result_cache.go @@ -1,4 +1,4 @@ -package verificationcaching +package verificationcache import ( "github.com/trufflesecurity/trufflehog/v3/pkg/cache" diff --git a/pkg/verificationcaching/verification_cache.go b/pkg/verificationcache/verification_cache.go similarity index 99% rename from pkg/verificationcaching/verification_cache.go rename to pkg/verificationcache/verification_cache.go index fcadcf29d587..e3ede23e54d1 100644 --- a/pkg/verificationcaching/verification_cache.go +++ b/pkg/verificationcache/verification_cache.go @@ -1,4 +1,4 @@ -package verificationcaching +package verificationcache import ( "sync" diff --git a/pkg/verificationcaching/verification_cache_test.go b/pkg/verificationcache/verification_cache_test.go similarity index 99% rename from pkg/verificationcaching/verification_cache_test.go rename to pkg/verificationcache/verification_cache_test.go index cf7869be12e8..33ce4df58711 100644 --- a/pkg/verificationcaching/verification_cache_test.go +++ b/pkg/verificationcache/verification_cache_test.go @@ -1,4 +1,4 @@ -package verificationcaching +package verificationcache import ( "context" From ad44a8437d0cf8a6d9deaa70ddd95d7afe7137f1 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 14:00:00 -0800 Subject: [PATCH 36/43] add thread safety comment --- pkg/verificationcache/metrics_reporter.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/verificationcache/metrics_reporter.go b/pkg/verificationcache/metrics_reporter.go index bd3a56c2beec..82a336aec20d 100644 --- a/pkg/verificationcache/metrics_reporter.go +++ b/pkg/verificationcache/metrics_reporter.go @@ -3,6 +3,7 @@ package verificationcache import "time" // MetricsReporter is an interface used by a verification cache to report various metrics related to its operation. +// Implementations must be thread-safe. type MetricsReporter interface { // AddCredentialVerificationsSaved records "saved" verification attempts, which is when credential verification // status is loaded from the cache instead of retrieved from a remote verification endpoint. This number might not From e7d7a01f453517c60f1e57b63b6c6d253ffe4189 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 14:01:30 -0800 Subject: [PATCH 37/43] clarify comment --- pkg/verificationcache/metrics_reporter.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/verificationcache/metrics_reporter.go b/pkg/verificationcache/metrics_reporter.go index 82a336aec20d..c5f16865c3e9 100644 --- a/pkg/verificationcache/metrics_reporter.go +++ b/pkg/verificationcache/metrics_reporter.go @@ -6,11 +6,11 @@ import "time" // Implementations must be thread-safe. type MetricsReporter interface { // AddCredentialVerificationsSaved records "saved" verification attempts, which is when credential verification - // status is loaded from the cache instead of retrieved from a remote verification endpoint. This number might not - // match the number of cache hits due to hit "wasting"; see AddResultCacheHitsWasted for more information. + // status is loaded from the cache instead of retrieved from a remote verification endpoint. This number might be + // smaller than the cache hit count due to cache hit "wasting"; see AddResultCacheHitsWasted for more information. AddCredentialVerificationsSaved(count int) - // AddFromDataVerifyTimeSpent records wall time spent in calls to FromData with verify = true. + // AddFromDataVerifyTimeSpent records wall time spent in calls to detector.FromData with verify=true. AddFromDataVerifyTimeSpent(wallTime time.Duration) // AddResultCacheHits records result cache hits. Not all cache hits result in elided remote verification requests From f90530c9bc83a324a47530e4370ed8e8528f0ec1 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 15:10:02 -0800 Subject: [PATCH 38/43] test nil metrics --- pkg/verificationcache/verification_cache_test.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pkg/verificationcache/verification_cache_test.go b/pkg/verificationcache/verification_cache_test.go index 33ce4df58711..4ee3d9b20934 100644 --- a/pkg/verificationcache/verification_cache_test.go +++ b/pkg/verificationcache/verification_cache_test.go @@ -54,9 +54,8 @@ func TestVerificationCacheFromData_Passthrough(t *testing.T) { {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: true}, }} - metrics := InMemoryMetrics{} require.NotPanics(t, func() { - cache := New(nil, &metrics) + cache := New(nil, nil) results, err := cache.FromData( logContext.Background(), &detector, @@ -67,11 +66,6 @@ func TestVerificationCacheFromData_Passthrough(t *testing.T) { require.NoError(t, err) assert.Equal(t, 1, detector.fromDataCallCount) assert.ElementsMatch(t, detector.results, results) - assert.Less(t, int64(0), metrics.FromDataVerifyTimeSpentMS.Load()) - assert.Equal(t, int32(0), metrics.CredentialVerificationsSaved.Load()) - assert.Equal(t, int32(0), metrics.ResultCacheHits.Load()) - assert.Equal(t, int32(0), metrics.ResultCacheMisses.Load()) - assert.Equal(t, int32(0), metrics.ResultCacheHitsWasted.Load()) }) } From 52997ebbda5e5e5bdf6934c5f18d1e102c335cec Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 15:24:35 -0800 Subject: [PATCH 39/43] check for nil metrics and add more comments --- pkg/verificationcache/in_memory_metrics.go | 1 + pkg/verificationcache/result_cache.go | 2 +- pkg/verificationcache/verification_cache.go | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pkg/verificationcache/in_memory_metrics.go b/pkg/verificationcache/in_memory_metrics.go index 7f3446094f6b..e5a79eb93890 100644 --- a/pkg/verificationcache/in_memory_metrics.go +++ b/pkg/verificationcache/in_memory_metrics.go @@ -5,6 +5,7 @@ import ( "time" ) +// InMemoryMetrics is a MetricsReporter that stores reported metrics in memory for retrieval at the end of a scan. type InMemoryMetrics struct { CredentialVerificationsSaved atomic.Int32 FromDataVerifyTimeSpentMS atomic.Int64 diff --git a/pkg/verificationcache/result_cache.go b/pkg/verificationcache/result_cache.go index 1c00b02c90d0..b915f65f314f 100644 --- a/pkg/verificationcache/result_cache.go +++ b/pkg/verificationcache/result_cache.go @@ -5,5 +5,5 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" ) -// ResultCache is a cache that holds individual detector results. +// ResultCache is a cache that holds individual detector results. It serves as a component of a VerificationCache. type ResultCache cache.Cache[detectors.Result] diff --git a/pkg/verificationcache/verification_cache.go b/pkg/verificationcache/verification_cache.go index e3ede23e54d1..283b9f70bb81 100644 --- a/pkg/verificationcache/verification_cache.go +++ b/pkg/verificationcache/verification_cache.go @@ -9,6 +9,8 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/hasher" ) +// VerificationCache is a structure that can be used to cache verification results from detectors so that a given +// credential does not trigger multiple identical remote verification attempts. type VerificationCache struct { metrics MetricsReporter resultCache ResultCache @@ -17,7 +19,14 @@ type VerificationCache struct { hasher hasher.Hasher } +// New creates a new verification cache with the provided result cache and metrics reporter. If resultCache is nil, the +// verification cache will be a no-op passthrough, although it will still record relevant metrics to the provided +// metrics reporter in this case. func New(resultCache ResultCache, metrics MetricsReporter) VerificationCache { + if metrics == nil { + metrics = &InMemoryMetrics{} + } + return VerificationCache{ metrics: metrics, resultCache: resultCache, @@ -25,6 +34,17 @@ func New(resultCache ResultCache, metrics MetricsReporter) VerificationCache { } } +// FromData is a cache-aware facade in front of the provided detector's FromData method. +// +// If the verification cache's underlying result cache is nil, or verify is false, or forceCacheUpdate is true, this +// method invokes the provided detector's FromData method with the provided arguments and returns the result. If the +// result cache is non-nil and forceCacheUpdate is true, the result cache is updated with the results before they are +// returned. +// +// Otherwise, the detector's FromData method is called with verify=false. The results cache is then checked for each +// returned result. If there is a cache hit for each result, these cached values are all returned. Otherwise, the +// detector's FromData method is called again, but with verify=true, and the results are stored in the cache and then +// returned. func (v *VerificationCache) FromData( ctx context.Context, detector detectors.Detector, From 6ba1339fc3b0553bedca20d3c84cd0dd97eed0cd Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 15:27:38 -0800 Subject: [PATCH 40/43] tweak comment --- pkg/verificationcache/verification_cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/verificationcache/verification_cache.go b/pkg/verificationcache/verification_cache.go index 283b9f70bb81..1e27dc8236fa 100644 --- a/pkg/verificationcache/verification_cache.go +++ b/pkg/verificationcache/verification_cache.go @@ -41,7 +41,7 @@ func New(resultCache ResultCache, metrics MetricsReporter) VerificationCache { // result cache is non-nil and forceCacheUpdate is true, the result cache is updated with the results before they are // returned. // -// Otherwise, the detector's FromData method is called with verify=false. The results cache is then checked for each +// Otherwise, the detector's FromData method is called with verify=false. The result cache is then checked for each // returned result. If there is a cache hit for each result, these cached values are all returned. Otherwise, the // detector's FromData method is called again, but with verify=true, and the results are stored in the cache and then // returned. From c34f40a239494a80202c1c74e680eea6e5006c51 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 15:35:13 -0800 Subject: [PATCH 41/43] stop copying mutexes derp --- pkg/engine/engine.go | 2 +- pkg/verificationcache/verification_cache.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index ab1598f63006..0ee4080f49c2 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -160,7 +160,7 @@ type Engine struct { concurrency int decoders []decoders.Decoder detectors []detectors.Detector - verificationCache verificationcache.VerificationCache + verificationCache *verificationcache.VerificationCache // Any detectors configured to override sources' verification flags detectorVerificationOverrides map[config.DetectorID]bool diff --git a/pkg/verificationcache/verification_cache.go b/pkg/verificationcache/verification_cache.go index 1e27dc8236fa..8f2c9f609592 100644 --- a/pkg/verificationcache/verification_cache.go +++ b/pkg/verificationcache/verification_cache.go @@ -22,12 +22,12 @@ type VerificationCache struct { // New creates a new verification cache with the provided result cache and metrics reporter. If resultCache is nil, the // verification cache will be a no-op passthrough, although it will still record relevant metrics to the provided // metrics reporter in this case. -func New(resultCache ResultCache, metrics MetricsReporter) VerificationCache { +func New(resultCache ResultCache, metrics MetricsReporter) *VerificationCache { if metrics == nil { metrics = &InMemoryMetrics{} } - return VerificationCache{ + return &VerificationCache{ metrics: metrics, resultCache: resultCache, hasher: hasher.NewBlake2B(), From 548c4dcbb11b16e62b766784c7db6b3b8cb3b689 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 15:41:20 -0800 Subject: [PATCH 42/43] i hate computers --- pkg/verificationcache/verification_cache_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/verificationcache/verification_cache_test.go b/pkg/verificationcache/verification_cache_test.go index 4ee3d9b20934..0afda5631be7 100644 --- a/pkg/verificationcache/verification_cache_test.go +++ b/pkg/verificationcache/verification_cache_test.go @@ -135,8 +135,8 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T cacheData[0].SetVerificationError(errors.New("test verification error")) metrics := InMemoryMetrics{} cache := New(simple.NewCache[detectors.Result](), &metrics) - cache.resultCache.Set(getResultCacheKey(t, &cache, remoteResults[0]), cacheData[0]) - cache.resultCache.Set(getResultCacheKey(t, &cache, remoteResults[1]), cacheData[1]) + cache.resultCache.Set(getResultCacheKey(t, cache, remoteResults[0]), cacheData[0]) + cache.resultCache.Set(getResultCacheKey(t, cache, remoteResults[1]), cacheData[1]) results, err := cache.FromData( logContext.Background(), @@ -172,7 +172,7 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateFalseCacheMiss(t *testing.T) { resultCache := simple.NewCache[detectors.Result]() metrics := InMemoryMetrics{} cache := New(resultCache, &metrics) - cache.resultCache.Set(getResultCacheKey(t, &cache, detector.results[0]), cachedResult) + cache.resultCache.Set(getResultCacheKey(t, cache, detector.results[0]), cachedResult) results, err := cache.FromData( logContext.Background(), @@ -205,8 +205,8 @@ func TestFromDataCached_VerifyTrueForceCacheUpdateTrue(t *testing.T) { detector.results[1].SetVerificationError(errors.New("test verification error")) metrics := InMemoryMetrics{} cache := New(simple.NewCache[detectors.Result](), &metrics) - cache.resultCache.Set(getResultCacheKey(t, &cache, detector.results[0]), detectors.Result{Redacted: "hello", Verified: false}) - cache.resultCache.Set(getResultCacheKey(t, &cache, detector.results[1]), detectors.Result{Redacted: "world", Verified: true}) + cache.resultCache.Set(getResultCacheKey(t, cache, detector.results[0]), detectors.Result{Redacted: "hello", Verified: false}) + cache.resultCache.Set(getResultCacheKey(t, cache, detector.results[1]), detectors.Result{Redacted: "world", Verified: true}) results, err := cache.FromData( logContext.Background(), From 4ad471a23d2c4173129fcf934a9c50e82c1bb9d0 Mon Sep 17 00:00:00 2001 From: Cody Rose Date: Thu, 19 Dec 2024 15:47:48 -0800 Subject: [PATCH 43/43] they're so bad --- pkg/verificationcache/verification_cache_test.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pkg/verificationcache/verification_cache_test.go b/pkg/verificationcache/verification_cache_test.go index 0afda5631be7..69476844d48f 100644 --- a/pkg/verificationcache/verification_cache_test.go +++ b/pkg/verificationcache/verification_cache_test.go @@ -89,7 +89,11 @@ func TestVerificationCacheFromData_VerifyFalseForceCacheUpdateFalse(t *testing.T {Redacted: "hello", Raw: []byte("hello"), RawV2: []byte("helloV2"), Verified: false}, }, results) assert.Empty(t, cache.resultCache.Values()) - assert.Equal(t, InMemoryMetrics{}, metrics) + assert.Equal(t, int64(0), metrics.FromDataVerifyTimeSpentMS.Load()) + assert.Equal(t, int32(0), metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHitsWasted.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheMisses.Load()) } func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { @@ -118,7 +122,11 @@ func TestFromDataCached_VerifyFalseForceCacheUpdateTrue(t *testing.T) { {Redacted: "hello", Verified: false}, {Redacted: "world", Verified: false}, }, cache.resultCache.Values()) - assert.Equal(t, InMemoryMetrics{}, metrics) + assert.Equal(t, int64(0), metrics.FromDataVerifyTimeSpentMS.Load()) + assert.Equal(t, int32(0), metrics.CredentialVerificationsSaved.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHits.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheHitsWasted.Load()) + assert.Equal(t, int32(0), metrics.ResultCacheMisses.Load()) } func TestFromDataCached_VerifyTrueForceCacheUpdateFalseAllCacheHits(t *testing.T) {