diff --git a/pkg/detectors/detectors.go b/pkg/detectors/detectors.go index 9b3a4ffed841..c88b2d6b8574 100644 --- a/pkg/detectors/detectors.go +++ b/pkg/detectors/detectors.go @@ -6,6 +6,7 @@ import ( "errors" "math/big" "net/url" + "regexp" "strings" "unicode" @@ -44,6 +45,27 @@ type CustomResultsCleaner interface { ShouldCleanResultsIrrespectiveOfConfiguration() bool } +// ConditionalDetector is an optional interface that a detector can implement to +// skip chunks based on specific criteria. +type ConditionalDetector interface { + // ShouldScanChunk determines whether the detector should run. + ShouldScanChunk(chunk sources.Chunk) bool +} + +var lockFilePat = regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`) + +// Conditions is a set of common conditions to be used by ConditionalDetector. +// (Using anonymous structs is weird, but Go has no concept of static members... https://stackoverflow.com/a/55390104) +var Conditions = struct { + // LockFiles are a common source of false-positives. + // https://github.com/trufflesecurity/trufflehog/issues/1460 + IsLockFile func(path string) bool +}{ + IsLockFile: func(path string) bool { + return lockFilePat.MatchString(path) + }, +} + // Versioner is an optional interface that a detector can implement to // differentiate instances of the same detector type. type Versioner interface { diff --git a/pkg/detectors/parseur/parseur.go b/pkg/detectors/parseur/parseur.go index 44f647b6fb60..a87115437278 100644 --- a/pkg/detectors/parseur/parseur.go +++ b/pkg/detectors/parseur/parseur.go @@ -7,6 +7,8 @@ import ( "net/http" "strings" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -18,6 +20,7 @@ type Scanner struct { // Ensure the Scanner satisfies the interface at compile time var _ detectors.Detector = (*Scanner)(nil) +var _ detectors.ConditionalDetector = (*Scanner)(nil) var ( defaultClient = common.SaneHttpClient() @@ -32,6 +35,13 @@ func (s Scanner) Keywords() []string { return []string{"parseur"} } +func (s Scanner) ShouldScanChunk(chunk sources.Chunk) bool { + if m, ok := sources.NewGitSourceMetadata(chunk.SourceType, chunk.SourceMetadata); ok { + return !detectors.Conditions.IsLockFile(m.File) + } + return true +} + // FromData will find and optionally verify Parseur secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 0ee4080f49c2..0781c60e194e 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -802,6 +802,11 @@ func (e *Engine) scannerWorker(ctx context.Context) { } for _, detector := range matchingDetectors { + if d, ok := detector.Detector.(detectors.ConditionalDetector); ok && !d.ShouldScanChunk(*chunk) { + ctx.Logger().V(4).Info("skipping detector for chunk", "detector", detector.Type().String(), "chunk", chunk) + continue + } + decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides) wgDetect.Add(1) e.detectableChunksChan <- detectableChunk{ diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index ad53ff698d0b..189b4a1da813 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -45,6 +45,42 @@ type Chunk struct { Verify bool } +// GitSourceMetadata defines a common struct for Git-based source metadata. +type GitSourceMetadata struct { + Repository string + Commit string + File string +} + +func NewGitSourceMetadata(source sourcespb.SourceType, data *source_metadatapb.MetaData) (*GitSourceMetadata, bool) { + if data == nil { + return nil, false + } + + switch source { + case sourcespb.SourceType_SOURCE_TYPE_GIT: + md := data.GetGit() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_AZURE_REPOS: + md := data.GetAzureRepos() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_BITBUCKET: + md := data.GetBitbucket() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_GERRIT: + md := data.GetGerrit() + return &GitSourceMetadata{md.GetProject(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_GITHUB: + md := data.GetGithub() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + case sourcespb.SourceType_SOURCE_TYPE_GITLAB: + md := data.GetGitlab() + return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true + default: + return nil, false + } +} + // ChunkingTarget specifies criteria for a targeted chunking process. // Instead of collecting data indiscriminately, this struct allows the caller // to specify particular subsets of data they're interested in. This becomes