From 702f879db4106bf89408787286e24447791985ba Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Fri, 1 Sep 2023 00:12:39 -0400 Subject: [PATCH] feat: optional detectors --- pkg/detectors/detectors.go | 5 +++++ pkg/detectors/parseur/parseur.go | 33 ++++++++++++++++++++++++++++++++ pkg/engine/engine.go | 5 +++++ 3 files changed, 43 insertions(+) diff --git a/pkg/detectors/detectors.go b/pkg/detectors/detectors.go index 0a2356c0e791..13aa14b0d4f9 100644 --- a/pkg/detectors/detectors.go +++ b/pkg/detectors/detectors.go @@ -25,6 +25,11 @@ type Detector interface { Type() detectorspb.DetectorType } +type OptionalDetector interface { + // ShouldScanChunk determines whether the detector should run. + ShouldScanChunk(chunk sources.Chunk) bool +} + // Versioner is an optional interface that a detector can implement to // differentiate instances of the same detector type. type Versioner interface { diff --git a/pkg/detectors/parseur/parseur.go b/pkg/detectors/parseur/parseur.go index 4cad1da9dfcd..1007e26b41c4 100644 --- a/pkg/detectors/parseur/parseur.go +++ b/pkg/detectors/parseur/parseur.go @@ -3,6 +3,8 @@ package parseur import ( "context" "fmt" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb" + "github.com/trufflesecurity/trufflehog/v3/pkg/sources" "net/http" "regexp" "strings" @@ -20,6 +22,10 @@ var _ detectors.Detector = (*Scanner)(nil) var ( client = common.SaneHttpClient() + // Node.js lock files are a common source of false-positives. + // https://github.com/trufflesecurity/trufflehog/issues/1460 + ignoreFilesPat = regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`) + // Make sure that your group is surrounded in boundary characters such as below to reduce false positives keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"parseur"}) + `\b([a-f0-9]{40})\b`) ) @@ -30,6 +36,33 @@ func (s Scanner) Keywords() []string { return []string{"parseur"} } +func (s Scanner) ShouldScanChunk(chunk sources.Chunk) bool { + m := chunk.SourceMetadata + var file string + switch chunk.SourceType { + case sourcespb.SourceType_SOURCE_TYPE_BITBUCKET: + file = m.GetBitbucket().File + case sourcespb.SourceType_SOURCE_TYPE_DOCKER: + file = m.GetDocker().File + case sourcespb.SourceType_SOURCE_TYPE_GITHUB: + file = m.GetGithub().File + case sourcespb.SourceType_SOURCE_TYPE_PUBLIC_GIT: + // Here be dragons + //file = m.Get + return true + case sourcespb.SourceType_SOURCE_TYPE_GITLAB: + file = m.GetGitlab().File + case sourcespb.SourceType_SOURCE_TYPE_FILESYSTEM: + file = m.GetFilesystem().File + case sourcespb.SourceType_SOURCE_TYPE_GIT: + file = m.GetGit().File + default: + return true + } + + return ignoreFilesPat.MatchString(file) +} + // FromData will find and optionally verify Parseur secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 3489d980d375..0eeefd0722d9 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -464,6 +464,11 @@ func (e *Engine) detectorWorker(ctx context.Context) { for verify, detectorsSet := range e.detectors { for _, detector := range detectorsSet { + d, ok := detector.(detectors.OptionalDetector) + if ok && !d.ShouldScanChunk(*chunk) { + continue + } + chunkContainsKeyword := false for _, kw := range detector.Keywords() { if _, ok := matchedKeywords[strings.ToLower(kw)]; ok {