Skip to content

Commit

Permalink
feat: optional detectors
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz authored and Richard Gomez committed Dec 31, 2023
1 parent 857a371 commit ae8b74e
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 0 deletions.
18 changes: 18 additions & 0 deletions pkg/detectors/detectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"crypto/rand"
"math/big"
"net/url"
"regexp"
"strings"
"unicode"

Expand All @@ -27,6 +28,23 @@ type Detector interface {
Type() detectorspb.DetectorType
}

// ConditionalDetector is an optional interface that a detector can implement to
// skip chunks based on specific criteria.
type ConditionalDetector interface {
// ScanChunk determines whether the detector should run.
ScanChunk(chunk sources.Chunk) bool
}

// FilenameConditions is a set of common conditions to be used by ConditionalDetector.
// (Using anonymous structs is weird, but Go has no concept of static members... https://stackoverflow.com/a/55390104)
var FilenameConditions = struct {
// LockFiles are a common source of false-positives.
// https://github.com/trufflesecurity/trufflehog/issues/1460
LockFiles *regexp.Regexp
}{
LockFiles: regexp.MustCompile(`(^|/)(package(-lock)?\.json|yarn\.lock)$`),
}

// Versioner is an optional interface that a detector can implement to
// differentiate instances of the same detector type.
type Versioner interface {
Expand Down
11 changes: 11 additions & 0 deletions pkg/detectors/parseur/parseur.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"regexp"
"strings"

"github.com/trufflesecurity/trufflehog/v3/pkg/sources"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
Expand All @@ -16,6 +18,7 @@ type Scanner struct{}

// Ensure the Scanner satisfies the interface at compile time
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.ConditionalDetector = (*Scanner)(nil)

var (
client = common.SaneHttpClient()
Expand All @@ -30,6 +33,14 @@ func (s Scanner) Keywords() []string {
return []string{"parseur"}
}

func (s Scanner) ScanChunk(chunk sources.Chunk) bool {
// TODO: Can |chunk.SourceMetadata| be nil?
if m, ok := chunk.SourceMetadata.GetData().(sources.GitSourceMetadata); ok {
return detectors.FilenameConditions.LockFiles.MatchString(m.GetFile())
}
return true
}

// FromData will find and optionally verify Parseur secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
Expand Down
7 changes: 7 additions & 0 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,13 @@ func (e *Engine) detectorWorker(ctx context.Context) {
e.ahoCorasickCore.PopulateMatchingDetectors(string(decoded.Chunk.Data), chunkSpecificDetectors)

for k, detector := range chunkSpecificDetectors {
d, ok := detector.(detectors.ConditionalDetector)
if ok && !d.ScanChunk(*chunk) {
ctx.Logger().V(4).Info("skipping detector for chunk", "detector", detector.Type().String(), "chunk", chunk)
delete(chunkSpecificDetectors, k)
continue
}

decoded.Chunk.Verify = e.verify
wgDetect.Add(1)
e.detectableChunksChan <- detectableChunk{
Expand Down
8 changes: 8 additions & 0 deletions pkg/sources/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ type Chunk struct {
Verify bool
}

// GitSourceMetadata defines a common interface for Git-based source metadata.
// For example, this should match Git, Azure, Bitbucket, GitHub, and Gitlab.
type GitSourceMetadata interface {
GetRepository() string
GetCommit() string
GetFile() string
}

// ChunkingTarget specifies criteria for a targeted chunking process.
// Instead of collecting data indiscriminately, this struct allows the caller
// to specify particular subsets of data they're interested in. This becomes
Expand Down

0 comments on commit ae8b74e

Please sign in to comment.