Skip to content

Commit

Permalink
feat(detectors): add conditional interface
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz committed Dec 31, 2024
1 parent dde8f8a commit 380fea0
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 0 deletions.
22 changes: 22 additions & 0 deletions pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"math/big"
"net/url"
"regexp"
"strings"
"unicode"

Expand Down Expand Up @@ -44,6 +45,27 @@ type CustomResultsCleaner interface {
ShouldCleanResultsIrrespectiveOfConfiguration() bool
}

// ConditionalDetector is an optional interface that a detector can implement to
// skip chunks based on specific criteria.
type ConditionalDetector interface {
// ShouldScanChunk determines whether the detector should run.
ShouldScanChunk(chunk sources.Chunk) bool
}

var lockFilePat = regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`)

// Conditions is a set of common conditions to be used by ConditionalDetector.
// (Using anonymous structs is weird, but Go has no concept of static members... https://stackoverflow.com/a/55390104)
var Conditions = struct {
// LockFiles are a common source of false-positives.
// https://github.com/trufflesecurity/trufflehog/issues/1460
IsLockFile func(path string) bool
}{
IsLockFile: func(path string) bool {
return lockFilePat.MatchString(path)
},
}

// Versioner is an optional interface that a detector can implement to
// differentiate instances of the same detector type.
type Versioner interface {
Expand Down
10 changes: 10 additions & 0 deletions pkg/detectors/parseur/parseur.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"net/http"
"strings"

"github.com/trufflesecurity/trufflehog/v3/pkg/sources"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -18,6 +20,7 @@ type Scanner struct {

// Ensure the Scanner satisfies the interface at compile time
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.ConditionalDetector = (*Scanner)(nil)

var (
defaultClient = common.SaneHttpClient()
Expand All @@ -32,6 +35,13 @@ func (s Scanner) Keywords() []string {
return []string{"parseur"}
}

func (s Scanner) ShouldScanChunk(chunk sources.Chunk) bool {
if m, ok := sources.NewGitSourceMetadata(chunk.SourceType, chunk.SourceMetadata); ok {
return !detectors.Conditions.IsLockFile(m.File)
}
return true
}

// FromData will find and optionally verify Parseur secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
Expand Down
5 changes: 5 additions & 0 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,11 @@ func (e *Engine) scannerWorker(ctx context.Context) {
}

for _, detector := range matchingDetectors {
if d, ok := detector.Detector.(detectors.ConditionalDetector); ok && !d.ShouldScanChunk(*chunk) {
ctx.Logger().V(4).Info("skipping detector for chunk", "detector", detector.Type().String(), "chunk", chunk)
continue
}

decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides)
wgDetect.Add(1)
e.detectableChunksChan <- detectableChunk{
Expand Down
36 changes: 36 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,42 @@ type Chunk struct {
Verify bool
}

// GitSourceMetadata defines a common struct for Git-based source metadata.
type GitSourceMetadata struct {
Repository string
Commit string
File string
}

func NewGitSourceMetadata(source sourcespb.SourceType, data *source_metadatapb.MetaData) (*GitSourceMetadata, bool) {
if data == nil {
return nil, false
}

switch source {
case sourcespb.SourceType_SOURCE_TYPE_GIT:
md := data.GetGit()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_AZURE_REPOS:
md := data.GetAzureRepos()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_BITBUCKET:
md := data.GetBitbucket()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_GERRIT:
md := data.GetGerrit()
return &GitSourceMetadata{md.GetProject(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_GITHUB:
md := data.GetGithub()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
case sourcespb.SourceType_SOURCE_TYPE_GITLAB:
md := data.GetGitlab()
return &GitSourceMetadata{md.GetRepository(), md.GetCommit(), md.GetFile()}, true
default:
return nil, false
}
}

// ChunkingTarget specifies criteria for a targeted chunking process.
// Instead of collecting data indiscriminately, this struct allows the caller
// to specify particular subsets of data they're interested in. This becomes
Expand Down

0 comments on commit 380fea0

Please sign in to comment.