diff --git a/pkg/detectors/tailscale/tailscale_integration_test.go b/pkg/detectors/tailscale/tailscale_integration_test.go new file mode 100644 index 000000000000..28ee3ba5d8ad --- /dev/null +++ b/pkg/detectors/tailscale/tailscale_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tailscale + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTailscaleapi_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TAILSCALEAPI") + inactiveSecret := testSecrets.MustGetField("TAILSCALEAPI_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tailscaleapi secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tailscale, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tailscaleapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tailscale, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tailscaleapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tailscaleapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tailscale/tailscale_test.go b/pkg/detectors/tailscale/tailscale_test.go index 28ee3ba5d8ad..fccf1f89aed7 100644 --- a/pkg/detectors/tailscale/tailscale_test.go +++ b/pkg/detectors/tailscale/tailscale_test.go @@ -1,120 +1,81 @@ -//go:build detectors -// +build detectors - package tailscale import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTailscaleapi_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TAILSCALEAPI") - inactiveSecret := testSecrets.MustGetField("TAILSCALEAPI_INACTIVE") +var ( + validPattern = "tskey-rtzgayeq-RLL0xBAIXBhkYRhir0gmxHNoARkj0CqpNTj_xM2Zpm4lDxEIGsYbwO_kzlNSwrQrAOL4yacZGlBfj37e3WRRlmYfKtrgC-xmk0NNQFLQGCTPwcwQT7d6YipCXS1ScCVL8So" + invalidPattern = "tskey-rtzgayeq-RLL0xBAIXBhkYRhir0gmxH?oARkj0CqpNTj_xM2Zpm4lDxEIGsYbwO_kzlNSwrQrAOL4yacZGlBfj37e3WRRlmYfKtrgC-xmk0NNQFLQGCTPwcwQT7d6YipCXS1ScCVL8So" + keyword = "tailscale" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTailscaleapi_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tailscaleapi secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tailscale, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tailscale", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tailscaleapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tailscale, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tailscaleapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tailscaleapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tallyfy/tallyfy_integration_test.go b/pkg/detectors/tallyfy/tallyfy_integration_test.go new file mode 100644 index 000000000000..82cdc35d020d --- /dev/null +++ b/pkg/detectors/tallyfy/tallyfy_integration_test.go @@ -0,0 +1,117 @@ +//go:build detectors +// +build detectors + +package tallyfy + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTallyfy_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TALLYFY") + inactiveSecret := testSecrets.MustGetField("TALLYFY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tallyfy secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tallyfy, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tallyfy secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tallyfy, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tallyfy.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tallyfy.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/tallyfy/tallyfy_test.go b/pkg/detectors/tallyfy/tallyfy_test.go index 82cdc35d020d..3065da9edb25 100644 --- a/pkg/detectors/tallyfy/tallyfy_test.go +++ b/pkg/detectors/tallyfy/tallyfy_test.go @@ -1,116 +1,90 @@ -//go:build detectors -// +build detectors - package tallyfy import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTallyfy_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TALLYFY") - inactiveSecret := testSecrets.MustGetField("TALLYFY_INACTIVE") +var ( + validPattern = "K6azzL9elP7qKOQIyU2PF68Ej1WiZWCIQBUY.nYqWoaP99FfEwzxQOuMLbkFvyu4i4AM74Dk55iokNXhD2YeyFuwze060X8NNXN1WqiVgavVLTuL9S45Q8hpa5yB9rVREeWrR3YhNclTRKL1vnW1f4BJZIMIOIPNFtN70ZXwIZEdDXzvZZtBLFA36R1WlKKKKd1NV7Rnc1RLCk9trhYwFsngcRFVPvqwLbPIv3hsfia31IDFDHXAdHuWXyDb0dA0vvOQqxmYn0kUEX13rEdFzBnZ7rJL861B9vCYkbBJSgujp.EV15KF8CXF3GmQ5NgXo_8KMkmLc55Z6v3JBVstk381Bpr53q3tqISdfjj3zNj1bRFk6KXNrjUzLMkZjVBbpme5-dAkJFcs5r1jmHXkrRmE0xLk25TUX8TukVdzCjWxYogQJopcXHfw_uqpF8JJ1FomLhpRu8Ag0RrRaO-IfF0Nz-SbQcIOtEJrvbfzVF3LVDPU5ID8bi3cz3qVuS30TxR-0fLaQHnRq0POGYlsy-OEZI078Wj0g3diAr8Yy6SGOkT-CB0p175aYkhUmNGhWpsGMyjASr-MXT9i2HPUPoI50X-Xw6HsQrqydLqYQg-uA2FDTpqza3IZMcyYj-4AeFWgzj3fSgyqM-IK47Cf52hzrW4YPmI9Lz-ooGPbd1ZAJXVLDrNtsG9U4VetIxPKpz4RgeVnGwy2NW53U1L7sMixwewhBvL8WL-T5FEyLt8-q-6h_ubaOxhVKjEzwIkunAzLYfCLYCj0MaPvTfn2mSbJ5r_5Mc6Mx-skqUt_yJhKsyYoXQOFni0awoPIf_xOEpvjmUELr2ZyPB-EqiFXB9zRC4oYO2QULWQYocd0cCKHsRL_ulfZ9qI1XR4Wl04DumNuJHdXodiSWXMwUOgsNf-vMs-2IHoVahWkihTSV6FyDNAitSLSEGNK5x_egOOY5tJWyvZpU8s80Rw7lF6Y-8C5f" + invalidPattern = "K?azzL9elP7qKOQIyU2PF68Ej1WiZWCIQBUY.nYqWoaP99FfEwzxQOuMLbkFvyu4i4AM74Dk55iokNXhD2YeyFuwze060X8NNXN1WqiVgavVLTuL9S45Q8hpa5yB9rVREeWrR3YhNclTRKL1vnW1f4BJZIMIOIPNFtN70ZXwIZEdDXzvZZtBLFA36R1WlKKKKd1NV7Rnc1RLCk9trhYwFsngcRFVPvqwLbPIv3hsfia31IDFDHXAdHuWXyDb0dA0vvOQqxmYn0kUEX13rEdFzBnZ7rJL861B9vCYkbBJSgujp.EV15KF8CXF3GmQ5NgXo_8KMkmLc55Z6v3JBVstk381Bpr53q3tqISdfjj3zNj1bRFk6KXNrjUzLMkZjVBbpme5-dAkJFcs5r1jmHXkrRmE0xLk25TUX8TukVdzCjWxYogQJopcXHfw_uqpF8JJ1FomLhpRu8Ag0RrRaO-IfF0Nz-SbQcIOtEJrvbfzVF3LVDPU5ID8bi3cz3qVuS30TxR-0fLaQHnRq0POGYlsy-OEZI078Wj0g3diAr8Yy6SGOkT-CB0p175aYkhUmNGhWpsGMyjASr-MXT9i2HPUPoI50X-Xw6HsQrqydLqYQg-uA2FDTpqza3IZMcyYj-4AeFWgzj3fSgyqM-IK47Cf52hzrW4YPmI9Lz-ooGPbd1ZAJXVLDrNtsG9U4VetIxPKpz4RgeVnGwy2NW53U1L7sMixwewhBvL8WL-T5FEyLt8-q-6h_ubaOxhVKjEzwIkunAzLYfCLYCj0MaPvTfn2mSbJ5r_5Mc6Mx-skqUt_yJhKsyYoXQOFni0awoPIf_xOEpvjmUELr2ZyPB-EqiFXB9zRC4oYO2QULWQYocd0cCKHsRL_ulfZ9qI1XR4Wl04DumNuJHdXodiSWXMwUOgsNf-vMs-2IHoVahWkihTSV6FyDNAitSLSEGNK5x_egOOY5tJWyvZpU8s80Rw7lF6Y-8C5f" + keyword = "tallyfy" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTallyfy_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tallyfy secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tallyfy, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tallyfy", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tallyfy secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tallyfy, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tallyfy.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - got[i].Raw = nil + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tallyfy.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/tatumio/tatumio_integration_test.go b/pkg/detectors/tatumio/tatumio_integration_test.go new file mode 100644 index 000000000000..885ed5268f9b --- /dev/null +++ b/pkg/detectors/tatumio/tatumio_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tatumio + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTatumIO_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TATUMIO") + inactiveSecret := testSecrets.MustGetField("TATUMIO_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tatumio secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TatumIO, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tatumio secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TatumIO, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TatumIO.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TatumIO.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tatumio/tatumio_test.go b/pkg/detectors/tatumio/tatumio_test.go index 885ed5268f9b..926c8fe4663b 100644 --- a/pkg/detectors/tatumio/tatumio_test.go +++ b/pkg/detectors/tatumio/tatumio_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tatumio import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTatumIO_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TATUMIO") - inactiveSecret := testSecrets.MustGetField("TATUMIO_INACTIVE") +var ( + validPattern = "xu4iyzk6fa6ld8nz9ayms4yq3wbh48qmiu8k" + invalidPattern = "xu4i?zk6fa6ld8nz9ayms4yq3wbh48qmiu8k" + keyword = "tatumio" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTatumIO_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tatumio secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TatumIO, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tatumio", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tatumio secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TatumIO, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TatumIO.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TatumIO.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/taxjar/taxjar_integration_test.go b/pkg/detectors/taxjar/taxjar_integration_test.go new file mode 100644 index 000000000000..1eb5ea716137 --- /dev/null +++ b/pkg/detectors/taxjar/taxjar_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package taxjar + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTaxjar_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TAXJAR_TOKEN") + inactiveSecret := testSecrets.MustGetField("TAXJAR_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a taxjar secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Taxjar, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a taxjar secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Taxjar, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Taxjar.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Taxjar.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/taxjar/taxjar_test.go b/pkg/detectors/taxjar/taxjar_test.go index 1eb5ea716137..efd34e121562 100644 --- a/pkg/detectors/taxjar/taxjar_test.go +++ b/pkg/detectors/taxjar/taxjar_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package taxjar import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTaxjar_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TAXJAR_TOKEN") - inactiveSecret := testSecrets.MustGetField("TAXJAR_INACTIVE") +var ( + validPattern = "2jnsyu2ebcnaemws9b2rmh6w6e12i8j7" + invalidPattern = "2jnsyu2ebcnaemws?b2rmh6w6e12i8j7" + keyword = "taxjar" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTaxjar_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a taxjar secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Taxjar, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword taxjar", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a taxjar secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Taxjar, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Taxjar.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Taxjar.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/teamgate/teamgate_integration_test.go b/pkg/detectors/teamgate/teamgate_integration_test.go new file mode 100644 index 000000000000..8ff2b5e7d61e --- /dev/null +++ b/pkg/detectors/teamgate/teamgate_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package teamgate + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTeamgate_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TEAMGATE") + key := testSecrets.MustGetField("TEAMGATE_KEY") + inactiveSecret := testSecrets.MustGetField("TEAMGATE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamgate secret %s within teamgate %s", secret, key)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Teamgate, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamgate secret %s within teamgate %s but not valid", inactiveSecret, key)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Teamgate, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Teamgate.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Teamgate.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/teamgate/teamgate_test.go b/pkg/detectors/teamgate/teamgate_test.go index 8ff2b5e7d61e..a46231cefca9 100644 --- a/pkg/detectors/teamgate/teamgate_test.go +++ b/pkg/detectors/teamgate/teamgate_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package teamgate import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTeamgate_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TEAMGATE") - key := testSecrets.MustGetField("TEAMGATE_KEY") - inactiveSecret := testSecrets.MustGetField("TEAMGATE_INACTIVE") +var ( + validToken = "78fohntospcdns4n7zokz7zr134vn7ua7io7ehp1" + invalidToken = "78fohntospcdns4n7?okz7zr134vn7ua7io7ehp1" + validKey = "AdPZhlbr8bIaIUomTizYvauT2HMNUfm6oK4Aft8JICXKvdKbHOEeRVLPycmGLi60QBksu5tPvD8X4ciX" + invalidKey = "AdPZhlbr8b?aIUomTizYvauT2HMNUfm6oK4Aft8JICXKvdKbHOEeRVLPycmGLi60QBksu5tPvD8X4ciX" + keyword = "teamgate" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTeamgate_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamgate secret %s within teamgate %s", secret, key)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Teamgate, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword teamgate", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validToken, keyword, validKey), + want: []string{validToken}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamgate secret %s within teamgate %s but not valid", inactiveSecret, key)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Teamgate, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidToken, keyword, invalidKey), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Teamgate.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Teamgate.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/teamworkcrm/teamworkcrm_integration_test.go b/pkg/detectors/teamworkcrm/teamworkcrm_integration_test.go new file mode 100644 index 000000000000..1c55c09d9324 --- /dev/null +++ b/pkg/detectors/teamworkcrm/teamworkcrm_integration_test.go @@ -0,0 +1,117 @@ +//go:build detectors +// +build detectors + +package teamworkcrm + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTeamworkCRM_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TEAMWORKCRM") + inactiveSecret := testSecrets.MustGetField("TEAMWORKCRM_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamworkcrm secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TeamworkCRM, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamworkcrm secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TeamworkCRM, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TeamworkCRM.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TeamworkCRM.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/teamworkcrm/teamworkcrm_test.go b/pkg/detectors/teamworkcrm/teamworkcrm_test.go index 1c55c09d9324..a0c3db8f547c 100644 --- a/pkg/detectors/teamworkcrm/teamworkcrm_test.go +++ b/pkg/detectors/teamworkcrm/teamworkcrm_test.go @@ -1,116 +1,90 @@ -//go:build detectors -// +build detectors - package teamworkcrm import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTeamworkCRM_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TEAMWORKCRM") - inactiveSecret := testSecrets.MustGetField("TEAMWORKCRM_INACTIVE") +var ( + validPattern = "tkn.v1_LggER0OG5HCo5DCy1FAHXVJiNKat8RbSygBGt0a5dbZlMN1hwlgSXx2ZzfFg4Ax98BbIIlH=" + invalidPattern = "tkn.v1_L?gER0OG5HCo5DCy1FAHXVJiNKat8RbSygBGt0a5dbZlMN1hwlgSXx2ZzfFg4Ax98BbIIlH=" + keyword = "teamworkcrm" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTeamworkCRM_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamworkcrm secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TeamworkCRM, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword teamworkcrm", + input: fmt.Sprintf("%s token = '%s '", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamworkcrm secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TeamworkCRM, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s ' | '%s '", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s '", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TeamworkCRM.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - got[i].Raw = nil + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TeamworkCRM.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/teamworkdesk/teamworkdesk_integration_test.go b/pkg/detectors/teamworkdesk/teamworkdesk_integration_test.go new file mode 100644 index 000000000000..bf03fed6fa42 --- /dev/null +++ b/pkg/detectors/teamworkdesk/teamworkdesk_integration_test.go @@ -0,0 +1,117 @@ +//go:build detectors +// +build detectors + +package teamworkdesk + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTeamworkDesk_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TEAMWORKDESK") + inactiveSecret := testSecrets.MustGetField("TEAMWORKDESK_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamworkdesk secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TeamworkDesk, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamworkdesk secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TeamworkDesk, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TeamworkDesk.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TeamworkDesk.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/teamworkdesk/teamworkdesk_test.go b/pkg/detectors/teamworkdesk/teamworkdesk_test.go index bf03fed6fa42..1c49df320f22 100644 --- a/pkg/detectors/teamworkdesk/teamworkdesk_test.go +++ b/pkg/detectors/teamworkdesk/teamworkdesk_test.go @@ -1,116 +1,90 @@ -//go:build detectors -// +build detectors - package teamworkdesk import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTeamworkDesk_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TEAMWORKDESK") - inactiveSecret := testSecrets.MustGetField("TEAMWORKDESK_INACTIVE") +var ( + validPattern = "tkn.v1_PQD5xBDqpPQHJjX1CFtPaM1xgf0vyrmVkpUtN0aMpkBjBnG8Ifg3ohgpd4w1gG1Otaf0O1v=" + invalidPattern = "tkn.v1_PQD5xBDq?PQHJjX1CFtPaM1xgf0vyrmVkpUtN0aMpkBjBnG8Ifg3ohgpd4w1gG1Otaf0O1v=" + keyword = "teamworkdesk" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTeamworkDesk_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamworkdesk secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TeamworkDesk, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword teamworkdesk", + input: fmt.Sprintf("%s token = '%s '", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamworkdesk secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TeamworkDesk, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s ' | '%s '", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s '", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TeamworkDesk.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - got[i].Raw = nil + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TeamworkDesk.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/teamworkspaces/teamworkspaces_integration_test.go b/pkg/detectors/teamworkspaces/teamworkspaces_integration_test.go new file mode 100644 index 000000000000..ea693bad9f45 --- /dev/null +++ b/pkg/detectors/teamworkspaces/teamworkspaces_integration_test.go @@ -0,0 +1,117 @@ +//go:build detectors +// +build detectors + +package teamworkspaces + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTeamworkSpaces_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TEAMWORKSPACES") + inactiveSecret := testSecrets.MustGetField("TEAMWORKSPACES_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamworkspaces secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TeamworkSpaces, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teamworkspaces secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TeamworkSpaces, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TeamworkSpaces.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TeamworkSpaces.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/teamworkspaces/teamworkspaces_test.go b/pkg/detectors/teamworkspaces/teamworkspaces_test.go index ea693bad9f45..14b226d69a93 100644 --- a/pkg/detectors/teamworkspaces/teamworkspaces_test.go +++ b/pkg/detectors/teamworkspaces/teamworkspaces_test.go @@ -1,116 +1,90 @@ -//go:build detectors -// +build detectors - package teamworkspaces import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTeamworkSpaces_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TEAMWORKSPACES") - inactiveSecret := testSecrets.MustGetField("TEAMWORKSPACES_INACTIVE") +var ( + validPattern = "tkn.v1_O7NGF5FYyhV8NYI7vKc5ulE5pQkRKghGRW1o96i3PixggnAhs0IprhOJK2jMfSF8bCx2qaM=" + invalidPattern = "tkn.v1_O7NGF5FYyhV8NYI7vKc5ulE5pQkRKghGR?1o96i3PixggnAhs0IprhOJK2jMfSF8bCx2qaM=" + keyword = "teamworkspaces" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTeamworkSpaces_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamworkspaces secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TeamworkSpaces, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword teamworkspaces", + input: fmt.Sprintf("%s token = '%s '", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teamworkspaces secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TeamworkSpaces, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s ' | '%s '", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s '", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TeamworkSpaces.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - got[i].Raw = nil + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TeamworkSpaces.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/technicalanalysisapi/technicalanalysisapi_integration_test.go b/pkg/detectors/technicalanalysisapi/technicalanalysisapi_integration_test.go new file mode 100644 index 000000000000..b3ca4b3cb613 --- /dev/null +++ b/pkg/detectors/technicalanalysisapi/technicalanalysisapi_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package technicalanalysisapi + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTechnicalAnalysisApi_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TECHNICALANALYSISAPI") + inactiveSecret := testSecrets.MustGetField("TECHNICALANALYSISAPI_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a technicalanalysisapi secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TechnicalAnalysisApi, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a technicalanalysisapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TechnicalAnalysisApi, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TechnicalAnalysisApi.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TechnicalAnalysisApi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/technicalanalysisapi/technicalanalysisapi_test.go b/pkg/detectors/technicalanalysisapi/technicalanalysisapi_test.go index b3ca4b3cb613..52fab107c428 100644 --- a/pkg/detectors/technicalanalysisapi/technicalanalysisapi_test.go +++ b/pkg/detectors/technicalanalysisapi/technicalanalysisapi_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package technicalanalysisapi import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTechnicalAnalysisApi_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TECHNICALANALYSISAPI") - inactiveSecret := testSecrets.MustGetField("TECHNICALANALYSISAPI_INACTIVE") +var ( + validPattern = "UNVGPP73JF6PMNU362Q6OEDI2AS6E24BMSAFVAPIXW7XI5O8" + invalidPattern = "UNVGPP73JF6PMNU36?Q6OEDI2AS6E24BMSAFVAPIXW7XI5O8" + keyword = "technicalanalysisapi" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTechnicalAnalysisApi_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a technicalanalysisapi secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TechnicalAnalysisApi, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword technicalanalysisapi", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a technicalanalysisapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TechnicalAnalysisApi, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TechnicalAnalysisApi.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TechnicalAnalysisApi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tefter/tefter_integration_test.go b/pkg/detectors/tefter/tefter_integration_test.go new file mode 100644 index 000000000000..607e8e421364 --- /dev/null +++ b/pkg/detectors/tefter/tefter_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tefter + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTefter_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TEFTER") + inactiveSecret := testSecrets.MustGetField("TEFTER_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tefter secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tefter, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tefter secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tefter, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tefter.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tefter.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tefter/tefter_test.go b/pkg/detectors/tefter/tefter_test.go index 607e8e421364..f3af28633856 100644 --- a/pkg/detectors/tefter/tefter_test.go +++ b/pkg/detectors/tefter/tefter_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tefter import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTefter_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TEFTER") - inactiveSecret := testSecrets.MustGetField("TEFTER_INACTIVE") +var ( + validPattern = "gnKLZ6PNGylpRrrJjiWe" + invalidPattern = "gnKLZ6PNGy?pRrrJjiWe" + keyword = "tefter" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTefter_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tefter secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tefter, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tefter", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tefter secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tefter, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tefter.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tefter.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/telegrambottoken/telegrambottoken_integration_test.go b/pkg/detectors/telegrambottoken/telegrambottoken_integration_test.go new file mode 100644 index 000000000000..bd7aab393043 --- /dev/null +++ b/pkg/detectors/telegrambottoken/telegrambottoken_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package telegrambottoken + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTelegramBotToken_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TELEGRAMBOTTOKEN_TOKEN") + inactiveSecret := testSecrets.MustGetField("TELEGRAMBOTTOKEN_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a telegrambottoken secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TelegramBotToken, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a telegrambottoken secret %s within", inactiveSecret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TelegramBotToken, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TelegramBotToken.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TelegramBotToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/telegrambottoken/telegrambottoken_test.go b/pkg/detectors/telegrambottoken/telegrambottoken_test.go index bd7aab393043..d500a0d5ed61 100644 --- a/pkg/detectors/telegrambottoken/telegrambottoken_test.go +++ b/pkg/detectors/telegrambottoken/telegrambottoken_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package telegrambottoken import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTelegramBotToken_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TELEGRAMBOTTOKEN_TOKEN") - inactiveSecret := testSecrets.MustGetField("TELEGRAMBOTTOKEN_INACTIVE") +var ( + validPattern = "869511423:BctOM4-oBANUkq653PAPbo4HLs3DCd9NCDe" + invalidPattern = "869511423:BctOM4-oBANU?q653PAPbo4HLs3DCd9NCDe" + keyword = "telegrambottoken" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTelegramBotToken_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a telegrambottoken secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TelegramBotToken, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword telegrambottoken", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a telegrambottoken secret %s within", inactiveSecret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TelegramBotToken, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TelegramBotToken.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TelegramBotToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/teletype/teletype_integration_test.go b/pkg/detectors/teletype/teletype_integration_test.go new file mode 100644 index 000000000000..c36edd0e604e --- /dev/null +++ b/pkg/detectors/teletype/teletype_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package teletype + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTeletype_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TELETYPE") + inactiveSecret := testSecrets.MustGetField("TELETYPE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teletype secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Teletype, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a teletype secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Teletype, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Teletype.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Teletype.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/teletype/teletype_test.go b/pkg/detectors/teletype/teletype_test.go index c36edd0e604e..bc0f26d26e68 100644 --- a/pkg/detectors/teletype/teletype_test.go +++ b/pkg/detectors/teletype/teletype_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package teletype import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTeletype_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TELETYPE") - inactiveSecret := testSecrets.MustGetField("TELETYPE_INACTIVE") +var ( + validPattern = "WdEUY8VfTBjpSpHlrhjbsdarYSVf-jXZfwYPCt1ckQLBlJ3GX-p9xs3dBor0ekHT" + invalidPattern = "WdEUY8V?TBjpSpHlrhjbsdarYSVf-jXZfwYPCt1ckQLBlJ3GX-p9xs3dBor0ekHT" + keyword = "teletype" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTeletype_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teletype secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Teletype, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword teletype", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a teletype secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Teletype, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Teletype.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Teletype.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/telnyx/telnyx_integration_test.go b/pkg/detectors/telnyx/telnyx_integration_test.go new file mode 100644 index 000000000000..7eb3a8848482 --- /dev/null +++ b/pkg/detectors/telnyx/telnyx_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package telnyx + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTelnyx_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TELNYX") + inactiveSecret := testSecrets.MustGetField("TELNYX_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a telnyx secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Telnyx, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a telnyx secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Telnyx, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Telnyx.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Telnyx.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/telnyx/telnyx_test.go b/pkg/detectors/telnyx/telnyx_test.go index 7eb3a8848482..688d90243c83 100644 --- a/pkg/detectors/telnyx/telnyx_test.go +++ b/pkg/detectors/telnyx/telnyx_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package telnyx import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTelnyx_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TELNYX") - inactiveSecret := testSecrets.MustGetField("TELNYX_INACTIVE") +var ( + validPattern = "KEYAptrcGNPkrkfIifpxSr1PAf5ChNcx-APIMKz9_ImMMXli1w16QD97i5" + invalidPattern = "KEYAptrcG?PkrkfIifpxSr1PAf5ChNcx-APIMKz9_ImMMXli1w16QD97i5" + keyword = "telnyx" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTelnyx_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a telnyx secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Telnyx, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword telnyx", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a telnyx secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Telnyx, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Telnyx.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Telnyx.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/terraformcloudpersonaltoken/terraformcloudpersonaltoken_integration_test.go b/pkg/detectors/terraformcloudpersonaltoken/terraformcloudpersonaltoken_integration_test.go new file mode 100644 index 000000000000..2d0f54d3bb6d --- /dev/null +++ b/pkg/detectors/terraformcloudpersonaltoken/terraformcloudpersonaltoken_integration_test.go @@ -0,0 +1,143 @@ +//go:build detectors +// +build detectors + +package terraformcloudpersonaltoken + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTerraformCloudPersonalToken_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TERRAFORMCLOUDPERSONALTOKEN_TOKEN") + inactiveSecret := testSecrets.MustGetField("TERRAFORMCLOUDPERSONALTOKEN_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a terra secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TerraformCloudPersonalToken, + Verified: true, + }, + }, + wantErr: false, + }, + + { + name: "found, unverified in json", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf(`{ + "credentials": { + "app.terraform.io": { + "token": "%s" + } + } + }`, inactiveSecret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TerraformCloudPersonalToken, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a terra secret %s within but unverified", inactiveSecret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TerraformCloudPersonalToken, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TerraformCloudPersonalToken.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TerraformCloudPersonalToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/terraformcloudpersonaltoken/terraformcloudpersonaltoken_test.go b/pkg/detectors/terraformcloudpersonaltoken/terraformcloudpersonaltoken_test.go index 2d0f54d3bb6d..47d556c1bd60 100644 --- a/pkg/detectors/terraformcloudpersonaltoken/terraformcloudpersonaltoken_test.go +++ b/pkg/detectors/terraformcloudpersonaltoken/terraformcloudpersonaltoken_test.go @@ -1,143 +1,81 @@ -//go:build detectors -// +build detectors - package terraformcloudpersonaltoken import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTerraformCloudPersonalToken_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TERRAFORMCLOUDPERSONALTOKEN_TOKEN") - inactiveSecret := testSecrets.MustGetField("TERRAFORMCLOUDPERSONALTOKEN_INACTIVE") +var ( + validPattern = "HMu97kXVPMUVOv.atlasv1.RjI0iSluLRxMTVU9x04xUV5iwpgGkHXnLtyVgNg2v7iedttRzvjMNuvisQob0OVaDLH" + invalidPattern = "HMu97kXVPMUVOv.atlasv1.RjI0iSluLRxMTVU9x04xUV?iwpgGkHXnLtyVgNg2v7iedttRzvjMNuvisQob0OVaDLH" + keyword = "terraformcloudpersonaltoken" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTerraformCloudPersonalToken_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a terra secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TerraformCloudPersonalToken, - Verified: true, - }, - }, - wantErr: false, - }, - - { - name: "found, unverified in json", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf(`{ - "credentials": { - "app.terraform.io": { - "token": "%s" - } - } - }`, inactiveSecret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TerraformCloudPersonalToken, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - with keyword terraformcloudpersonaltoken", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a terra secret %s within but unverified", inactiveSecret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TerraformCloudPersonalToken, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TerraformCloudPersonalToken.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TerraformCloudPersonalToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/testingbot/testingbot_integration_test.go b/pkg/detectors/testingbot/testingbot_integration_test.go new file mode 100644 index 000000000000..a0a9b2a8a21d --- /dev/null +++ b/pkg/detectors/testingbot/testingbot_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package testingbot + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTestingBot_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TESTINGBOT") + id := testSecrets.MustGetField("TESTINGBOT_USER") + inactiveSecret := testSecrets.MustGetField("TESTINGBOT_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a testingbot secret %s within testingbot %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TestingBot, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a testingbot secret %s within testingbot %s but not valid", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TestingBot, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TestingBot.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TestingBot.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/testingbot/testingbot_test.go b/pkg/detectors/testingbot/testingbot_test.go index a0a9b2a8a21d..102f9832f661 100644 --- a/pkg/detectors/testingbot/testingbot_test.go +++ b/pkg/detectors/testingbot/testingbot_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package testingbot import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTestingBot_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TESTINGBOT") - id := testSecrets.MustGetField("TESTINGBOT_USER") - inactiveSecret := testSecrets.MustGetField("TESTINGBOT_INACTIVE") +var ( + validKey = "tyg635h477e5zf1cbvk94xjwam20mjn1" + invalidKey = "tyg635h477e5zf1c?vk94xjwam20mjn1" + validId = "apelbd5muwijaybdgn920qn64xjgnsf1" + invalidId = "apelbd5muwijaybd?n920qn64xjgnsf1" + keyword = "testingbot" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTestingBot_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a testingbot secret %s within testingbot %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TestingBot, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword testingbot", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validId), + want: []string{validKey, validId, validId, validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a testingbot secret %s within testingbot %s but not valid", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TestingBot, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidId), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TestingBot.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TestingBot.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/textmagic/textmagic_integration_test.go b/pkg/detectors/textmagic/textmagic_integration_test.go new file mode 100644 index 000000000000..251e20b12f57 --- /dev/null +++ b/pkg/detectors/textmagic/textmagic_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package textmagic + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTextmagic_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TEXTMAGIC") + user := testSecrets.MustGetField("SCANNER_USERNAME") + inactiveSecret := testSecrets.MustGetField("TEXTMAGIC_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a textmagic secret %s within textmagic user %s", secret, user)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Textmagic, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a textmagic secret %s within textmagic user %s but not valid", inactiveSecret, user)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Textmagic, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Textmagic.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Textmagic.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/textmagic/textmagic_test.go b/pkg/detectors/textmagic/textmagic_test.go index 251e20b12f57..0fe73c100f05 100644 --- a/pkg/detectors/textmagic/textmagic_test.go +++ b/pkg/detectors/textmagic/textmagic_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package textmagic import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTextmagic_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TEXTMAGIC") - user := testSecrets.MustGetField("SCANNER_USERNAME") - inactiveSecret := testSecrets.MustGetField("TEXTMAGIC_INACTIVE") +var ( + validKey = "hnNIvpj0lNNChF5z5sAzsgku4CuL0u" + invalidKey = "hnNIvpj0lNNChF5?5sAzsgku4CuL0u" + validUser = "kKVO9ZYWvpcZS0Zozc" + invalidUser = "?KVO9ZYWvp?ZS0Zozc" + keyword = "textmagic" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTextmagic_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a textmagic secret %s within textmagic user %s", secret, user)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Textmagic, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword textmagic", + input: fmt.Sprintf("%s '%s'\n'%s'\n", keyword, validKey, validUser), + want: []string{validKey + validUser}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a textmagic secret %s within textmagic user %s but not valid", inactiveSecret, user)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Textmagic, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s '%s'\n'%s'\n", keyword, invalidKey, invalidUser), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Textmagic.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Textmagic.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/theoddsapi/theoddsapi_integration_test.go b/pkg/detectors/theoddsapi/theoddsapi_integration_test.go new file mode 100644 index 000000000000..d4a5aead7daf --- /dev/null +++ b/pkg/detectors/theoddsapi/theoddsapi_integration_test.go @@ -0,0 +1,117 @@ +//go:build detectors +// +build detectors + +package theoddsapi + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTheOddsApi_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("THEODDSAPI") + inactiveSecret := testSecrets.MustGetField("THEODDSAPI_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a theoddsapi secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TheOddsApi, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a theoddsapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TheOddsApi, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TheOddsApi.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TheOddsApi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/theoddsapi/theoddsapi_test.go b/pkg/detectors/theoddsapi/theoddsapi_test.go index d4a5aead7daf..28257655371b 100644 --- a/pkg/detectors/theoddsapi/theoddsapi_test.go +++ b/pkg/detectors/theoddsapi/theoddsapi_test.go @@ -1,116 +1,90 @@ -//go:build detectors -// +build detectors - package theoddsapi import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTheOddsApi_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("THEODDSAPI") - inactiveSecret := testSecrets.MustGetField("THEODDSAPI_INACTIVE") +var ( + validPattern = "237eb668c01429518426aa82b374c9da" + invalidPattern = "237eb66?c01429518426aa82b374c9da" + keyword = "theoddsapi" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTheOddsApi_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a theoddsapi secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TheOddsApi, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword theoddsapi", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a theoddsapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TheOddsApi, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TheOddsApi.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - got[i].Raw = nil + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TheOddsApi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/thinkific/thinkific_integration_test.go b/pkg/detectors/thinkific/thinkific_integration_test.go new file mode 100644 index 000000000000..d9445616c73f --- /dev/null +++ b/pkg/detectors/thinkific/thinkific_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package thinkific + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestThinkific_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("THINKIFIC") + inactiveSecret := testSecrets.MustGetField("THINKIFIC_INACTIVE") + domain := testSecrets.MustGetField("THINKIFIC_DOMAIN") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a thinkific secret %s within thinkificdom %s", secret, domain)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Thinkific, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a thinkific secret %s within thinkificdom %s but not valid", inactiveSecret, domain)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Thinkific, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Thinkific.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Thinkific.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/thinkific/thinkific_test.go b/pkg/detectors/thinkific/thinkific_test.go index d9445616c73f..c5105c7d09e9 100644 --- a/pkg/detectors/thinkific/thinkific_test.go +++ b/pkg/detectors/thinkific/thinkific_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package thinkific import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestThinkific_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("THINKIFIC") - inactiveSecret := testSecrets.MustGetField("THINKIFIC_INACTIVE") - domain := testSecrets.MustGetField("THINKIFIC_DOMAIN") +var ( + validKey = "35992abe3b6b861c66d4e1d668ac9367" + invalidKey = "35992abe3b?b861c66d4e1d668ac9367" + validDomain = "orzBcpvjg21Oj2NPayZ3cZOOfaJpS7GwXLt6pP" + invalidDomain = "?rzBcpvjg21Oj2N?ayZ3cZOOfaJpS7GwXLt6pP" + keyword = "thinkific" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestThinkific_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a thinkific secret %s within thinkificdom %s", secret, domain)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Thinkific, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword thinkific", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validDomain), + want: []string{validKey, validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a thinkific secret %s within thinkificdom %s but not valid", inactiveSecret, domain)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Thinkific, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidDomain), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Thinkific.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Thinkific.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/thousandeyes/thousandeyes_integration_test.go b/pkg/detectors/thousandeyes/thousandeyes_integration_test.go new file mode 100644 index 000000000000..8a5ea0bead86 --- /dev/null +++ b/pkg/detectors/thousandeyes/thousandeyes_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package thousandeyes + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestThousandEyes_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("THOUSANDEYES") + email := testSecrets.MustGetField("THOUSANDEYES_USER") + inactiveSecret := testSecrets.MustGetField("THOUSANDEYES_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a thousandeyes secret %s within thousandeyes %s", secret, email)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_ThousandEyes, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a thousandeyes secret %s within thousandeyes %s but not valid", inactiveSecret, email)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_ThousandEyes, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("ThousandEyes.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("ThousandEyes.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/thousandeyes/thousandeyes_test.go b/pkg/detectors/thousandeyes/thousandeyes_test.go index 8a5ea0bead86..1a6ec8e087c6 100644 --- a/pkg/detectors/thousandeyes/thousandeyes_test.go +++ b/pkg/detectors/thousandeyes/thousandeyes_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package thousandeyes import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestThousandEyes_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("THOUSANDEYES") - email := testSecrets.MustGetField("THOUSANDEYES_USER") - inactiveSecret := testSecrets.MustGetField("THOUSANDEYES_INACTIVE") +var ( + validKey = "QRv5GRXQjWmQ6esvXuvk3pcw0IMAw5NO" + invalidKey = "QRv5GR?QjWmQ6esvXuvk3pcw0IMAw5NO" + validEmail = "ejtaK2kkwuVx@1U6Evvih9YeT" + invalidEmail = "ejtaK2kkwuVx?1U6Evvih9YeT" + keyword = "thousandeyes" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestThousandEyes_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a thousandeyes secret %s within thousandeyes %s", secret, email)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ThousandEyes, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword thousandeyes", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validEmail), + want: []string{validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a thousandeyes secret %s within thousandeyes %s but not valid", inactiveSecret, email)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_ThousandEyes, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidEmail), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("ThousandEyes.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("ThousandEyes.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/ticketmaster/ticketmaster_integration_test.go b/pkg/detectors/ticketmaster/ticketmaster_integration_test.go new file mode 100644 index 000000000000..6cdbefb91d97 --- /dev/null +++ b/pkg/detectors/ticketmaster/ticketmaster_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package ticketmaster + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTicketMaster_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TICKETMASTER") + inactiveSecret := testSecrets.MustGetField("TICKETMASTER_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a ticketmaster secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TicketMaster, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a ticketmaster secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TicketMaster, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TicketMaster.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TicketMaster.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/ticketmaster/ticketmaster_test.go b/pkg/detectors/ticketmaster/ticketmaster_test.go index 6cdbefb91d97..0458e155a078 100644 --- a/pkg/detectors/ticketmaster/ticketmaster_test.go +++ b/pkg/detectors/ticketmaster/ticketmaster_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package ticketmaster import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTicketMaster_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TICKETMASTER") - inactiveSecret := testSecrets.MustGetField("TICKETMASTER_INACTIVE") +var ( + validPattern = "QSLwTBLaOcyPYh9KW1edgeiyPxqLT1KK" + invalidPattern = "QSLwTBLaOcyPYh9K?1edgeiyPxqLT1KK" + keyword = "ticketmaster" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTicketMaster_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a ticketmaster secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TicketMaster, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword ticketmaster", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a ticketmaster secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TicketMaster, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TicketMaster.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TicketMaster.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tickettailor/tickettailor_integration_test.go b/pkg/detectors/tickettailor/tickettailor_integration_test.go new file mode 100644 index 000000000000..c391c47d6d0e --- /dev/null +++ b/pkg/detectors/tickettailor/tickettailor_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tickettailor + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTickettailor_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TICKETTAILOR") + inactiveSecret := testSecrets.MustGetField("TICKETTAILOR_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tickettailor secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tickettailor, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tickettailor secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tickettailor, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tickettailor.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tickettailor.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tickettailor/tickettailor_test.go b/pkg/detectors/tickettailor/tickettailor_test.go index c391c47d6d0e..bedd54b51f55 100644 --- a/pkg/detectors/tickettailor/tickettailor_test.go +++ b/pkg/detectors/tickettailor/tickettailor_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tickettailor import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTickettailor_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TICKETTAILOR") - inactiveSecret := testSecrets.MustGetField("TICKETTAILOR_INACTIVE") +var ( + validPattern = "skOFK3Yf_WW6E3TND0PXT5L4LPeOfVG7cEE_CdL2Y92fWNR" + invalidPattern = "skOFK3Yf_WW6E3TND0PXT5L?LPeOfVG7cEE_CdL2Y92fWNR" + keyword = "tickettailor" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTickettailor_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tickettailor secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tickettailor, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tickettailor", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tickettailor secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tickettailor, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tickettailor.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tickettailor.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tiingo/tiingo_integration_test.go b/pkg/detectors/tiingo/tiingo_integration_test.go new file mode 100644 index 000000000000..64e75d3dca19 --- /dev/null +++ b/pkg/detectors/tiingo/tiingo_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tiingo + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTiingo_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TIINGO") + inactiveSecret := testSecrets.MustGetField("TIINGO_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tiingo secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tiingo, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tiingo secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tiingo, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tiingo.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tiingo.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tiingo/tiingo_test.go b/pkg/detectors/tiingo/tiingo_test.go index 64e75d3dca19..51dd57559365 100644 --- a/pkg/detectors/tiingo/tiingo_test.go +++ b/pkg/detectors/tiingo/tiingo_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tiingo import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTiingo_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TIINGO") - inactiveSecret := testSecrets.MustGetField("TIINGO_INACTIVE") +var ( + validPattern = "r2ofhhzv4tb7mv2c7o32fd9vlusg03kc76dygnk2" + invalidPattern = "r2o?hhzv4tb7mv2c7o32fd9vlusg03kc76dygnk2" + keyword = "tiingo" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTiingo_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tiingo secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tiingo, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tiingo", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tiingo secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tiingo, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tiingo.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tiingo.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/timecamp/timecamp_integration_test.go b/pkg/detectors/timecamp/timecamp_integration_test.go new file mode 100644 index 000000000000..d091ae3ec4c8 --- /dev/null +++ b/pkg/detectors/timecamp/timecamp_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package timecamp + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTimeCamp_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TIMECAMP") + inactiveSecret := testSecrets.MustGetField("TIMECAMP_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a timecamp secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TimeCamp, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a timecamp secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TimeCamp, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TimeCamp.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TimeCamp.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/timecamp/timecamp_test.go b/pkg/detectors/timecamp/timecamp_test.go index d091ae3ec4c8..563037265874 100644 --- a/pkg/detectors/timecamp/timecamp_test.go +++ b/pkg/detectors/timecamp/timecamp_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package timecamp import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTimeCamp_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TIMECAMP") - inactiveSecret := testSecrets.MustGetField("TIMECAMP_INACTIVE") +var ( + validPattern = "66mtci6qdo8ccczw48j5lpt2uw" + invalidPattern = "66mt?i6qdo8ccczw48j5lpt2uw" + keyword = "timecamp" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTimeCamp_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a timecamp secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TimeCamp, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword timecamp", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a timecamp secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TimeCamp, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TimeCamp.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TimeCamp.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/timezoneapi/timezoneapi_integration_test.go b/pkg/detectors/timezoneapi/timezoneapi_integration_test.go new file mode 100644 index 000000000000..8a3accdd80b0 --- /dev/null +++ b/pkg/detectors/timezoneapi/timezoneapi_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package timezoneapi + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTimezoneapi_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TIMEZONEAPI") + inactiveSecret := testSecrets.MustGetField("TIMEZONEAPI_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a timezoneapi secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Timezoneapi, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a timezoneapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Timezoneapi, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Timezoneapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Timezoneapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/timezoneapi/timezoneapi_test.go b/pkg/detectors/timezoneapi/timezoneapi_test.go index 8a3accdd80b0..09993a459952 100644 --- a/pkg/detectors/timezoneapi/timezoneapi_test.go +++ b/pkg/detectors/timezoneapi/timezoneapi_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package timezoneapi import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTimezoneapi_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TIMEZONEAPI") - inactiveSecret := testSecrets.MustGetField("TIMEZONEAPI_INACTIVE") +var ( + validPattern = "SSVmFDNCFfOHKXQAEckx" + invalidPattern = "SSVmFDNCFf?HKXQAEckx" + keyword = "timezoneapi" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTimezoneapi_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a timezoneapi secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Timezoneapi, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword timezoneapi", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, + }, + { + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a timezoneapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Timezoneapi, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Timezoneapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Timezoneapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tineswebhook/tineswebhook_integration_test.go b/pkg/detectors/tineswebhook/tineswebhook_integration_test.go new file mode 100644 index 000000000000..f80b9e7fb626 --- /dev/null +++ b/pkg/detectors/tineswebhook/tineswebhook_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tineswebhook + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTinesWebhook_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TINESWEBHOOK_TOKEN") + inactiveSecret := testSecrets.MustGetField("TINESWEBHOOK_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tineswebhook secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TinesWebhook, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tineswebhook secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TinesWebhook, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TinesWebhook.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TinesWebhook.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tineswebhook/tineswebhook_test.go b/pkg/detectors/tineswebhook/tineswebhook_test.go index f80b9e7fb626..cc858d78ffa9 100644 --- a/pkg/detectors/tineswebhook/tineswebhook_test.go +++ b/pkg/detectors/tineswebhook/tineswebhook_test.go @@ -1,120 +1,81 @@ -//go:build detectors -// +build detectors - package tineswebhook import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTinesWebhook_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TINESWEBHOOK_TOKEN") - inactiveSecret := testSecrets.MustGetField("TINESWEBHOOK_INACTIVE") +var ( + validPattern = "https://efT.tines.com/webhook/4j55coiuyiladq9ydaoo4oan89u56vkm/u37txygi11dou8u2ttwe4rl63ax4lnw9" + invalidPattern = "https://efT.tines.com/webhook/4j55coiuyil?dq9ydaoo4oan89u56vkm/u37txygi11dou8u2ttwe4rl63ax4lnw9" + keyword = "tineswebhook" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTinesWebhook_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tineswebhook secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TinesWebhook, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tineswebhook", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tineswebhook secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TinesWebhook, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TinesWebhook.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TinesWebhook.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tly/tly_integration_test.go b/pkg/detectors/tly/tly_integration_test.go new file mode 100644 index 000000000000..c151f420dcac --- /dev/null +++ b/pkg/detectors/tly/tly_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tly + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTLy_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TLY") + inactiveSecret := testSecrets.MustGetField("TLY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tly secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TLy, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tly secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TLy, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TLy.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TLy.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tly/tly_test.go b/pkg/detectors/tly/tly_test.go index c151f420dcac..8723aad29ba6 100644 --- a/pkg/detectors/tly/tly_test.go +++ b/pkg/detectors/tly/tly_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tly import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTLy_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TLY") - inactiveSecret := testSecrets.MustGetField("TLY_INACTIVE") +var ( + validPattern = "VLmhHAQq7kjrhxrt7x07pJjnafVRara2aoqPndSOepXH2MOY3CcloWwG6ZcD" + invalidPattern = "VLmhHAQq7kjrhxrt7x07pJjn?fVRara2aoqPndSOepXH2MOY3CcloWwG6ZcD" + keyword = "tly" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTLy_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tly secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TLy, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tly", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, + }, + { + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tly secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TLy, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TLy.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TLy.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tmetric/tmetric_integration_test.go b/pkg/detectors/tmetric/tmetric_integration_test.go new file mode 100644 index 000000000000..dfe3154dd306 --- /dev/null +++ b/pkg/detectors/tmetric/tmetric_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tmetric + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTmetric_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TMETRIC") + inactiveSecret := testSecrets.MustGetField("TMETRIC_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tmetric secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tmetric, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tmetric secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tmetric, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tmetric.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tmetric.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tmetric/tmetric_test.go b/pkg/detectors/tmetric/tmetric_test.go index dfe3154dd306..becf3cbce4e0 100644 --- a/pkg/detectors/tmetric/tmetric_test.go +++ b/pkg/detectors/tmetric/tmetric_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tmetric import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTmetric_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TMETRIC") - inactiveSecret := testSecrets.MustGetField("TMETRIC_INACTIVE") +var ( + validPattern = "379NU836GQGRFS7Q6MBPKY5GX8F16AF2EKSUB3X4KA2RWKZPU3I3S7R7QVJPPMG5" + invalidPattern = "379NU836GQGRFS7Q6MBPKY5GX8F16AF2?KSUB3X4KA2RWKZPU3I3S7R7QVJPPMG5" + keyword = "tmetric" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTmetric_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tmetric secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tmetric, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tmetric", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tmetric secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tmetric, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tmetric.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tmetric.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/todoist/todoist_integration_test.go b/pkg/detectors/todoist/todoist_integration_test.go new file mode 100644 index 000000000000..de8f825b06bc --- /dev/null +++ b/pkg/detectors/todoist/todoist_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package todoist + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTodoist_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TODOIST") + inactiveSecret := testSecrets.MustGetField("TODOIST_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a todoist secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Todoist, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a todoist secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Todoist, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Todoist.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Todoist.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/todoist/todoist_test.go b/pkg/detectors/todoist/todoist_test.go index de8f825b06bc..907b32203775 100644 --- a/pkg/detectors/todoist/todoist_test.go +++ b/pkg/detectors/todoist/todoist_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package todoist import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTodoist_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TODOIST") - inactiveSecret := testSecrets.MustGetField("TODOIST_INACTIVE") +var ( + validPattern = "qpgv7z8amkp4ln55znaacezm9jy35wcayy6bya2r" + invalidPattern = "qpgv7z8amkp4ln55znaa?ezm9jy35wcayy6bya2r" + keyword = "todoist" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTodoist_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a todoist secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Todoist, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword todoist", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a todoist secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Todoist, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Todoist.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Todoist.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/toggltrack/toggltrack_integration_test.go b/pkg/detectors/toggltrack/toggltrack_integration_test.go new file mode 100644 index 000000000000..2e21c7ef7e17 --- /dev/null +++ b/pkg/detectors/toggltrack/toggltrack_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package toggltrack + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTogglTrack_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TOGGLTRACK") + inactiveSecret := testSecrets.MustGetField("TOGGLTRACK_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a toggltrack secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TogglTrack, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a toggltrack secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TogglTrack, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TogglTrack.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TogglTrack.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/toggltrack/toggltrack_test.go b/pkg/detectors/toggltrack/toggltrack_test.go index 2e21c7ef7e17..9725da4a71b8 100644 --- a/pkg/detectors/toggltrack/toggltrack_test.go +++ b/pkg/detectors/toggltrack/toggltrack_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package toggltrack import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTogglTrack_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TOGGLTRACK") - inactiveSecret := testSecrets.MustGetField("TOGGLTRACK_INACTIVE") +var ( + validPattern = "5hj12f7pnz8ha0rqxewpqvsuc8hzgy9h" + invalidPattern = "5hj12f7pnz8ha0rq?ewpqvsuc8hzgy9h" + keyword = "toggltrack" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTogglTrack_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a toggltrack secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TogglTrack, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword toggltrack", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a toggltrack secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TogglTrack, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TogglTrack.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TogglTrack.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tokeet/tokeet_integration_test.go b/pkg/detectors/tokeet/tokeet_integration_test.go new file mode 100644 index 000000000000..0b8ce70f3b40 --- /dev/null +++ b/pkg/detectors/tokeet/tokeet_integration_test.go @@ -0,0 +1,118 @@ +package tokeet + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTokeet_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TOKEET") + inactiveSecret := testSecrets.MustGetField("TOKEET_INACTIVE") + id := testSecrets.MustGetField("TOKEET_ID") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tokeet secret %s within tokeet %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tokeet, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tokeet secret %s within but not valid tokeet %s", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tokeet, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tokeet.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tokeet.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tokeet/tokeet_test.go b/pkg/detectors/tokeet/tokeet_test.go index 0b8ce70f3b40..bcf05a734272 100644 --- a/pkg/detectors/tokeet/tokeet_test.go +++ b/pkg/detectors/tokeet/tokeet_test.go @@ -4,115 +4,80 @@ import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTokeet_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TOKEET") - inactiveSecret := testSecrets.MustGetField("TOKEET_INACTIVE") - id := testSecrets.MustGetField("TOKEET_ID") +var ( + validKey = "d57fe072-befa-4df5-42f1-014f9d253699" + invalidKey = "d57fe072?befa-4df5-42f1-014f9d253699" + validId = "5573995420@7486" + invalidId = "5573995?20@7486" + keyword = "tokeet" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTokeet_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tokeet secret %s within tokeet %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tokeet, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tokeet", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validId), + want: []string{validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tokeet secret %s within but not valid tokeet %s", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tokeet, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidId), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tokeet.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tokeet.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tomorrowio/tomorrowio_integration_test.go b/pkg/detectors/tomorrowio/tomorrowio_integration_test.go new file mode 100644 index 000000000000..4e76e8475daf --- /dev/null +++ b/pkg/detectors/tomorrowio/tomorrowio_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tomorrowio + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTomorrowIO_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TOMORROWIO") + inactiveSecret := testSecrets.MustGetField("TOMORROWIO_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tomorrowio secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TomorrowIO, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tomorrowio secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TomorrowIO, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TomorrowIO.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TomorrowIO.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tomorrowio/tomorrowio_test.go b/pkg/detectors/tomorrowio/tomorrowio_test.go index 4e76e8475daf..6e5a0092ef75 100644 --- a/pkg/detectors/tomorrowio/tomorrowio_test.go +++ b/pkg/detectors/tomorrowio/tomorrowio_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tomorrowio import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTomorrowIO_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TOMORROWIO") - inactiveSecret := testSecrets.MustGetField("TOMORROWIO_INACTIVE") +var ( + validPattern = "Ha1FtSsu5YhtFJixA3j5cIzgooDbVzz1" + invalidPattern = "Ha1FtSsu5YhtFJix?3j5cIzgooDbVzz1" + keyword = "tomorrowio" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTomorrowIO_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tomorrowio secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TomorrowIO, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tomorrowio", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tomorrowio secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TomorrowIO, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TomorrowIO.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TomorrowIO.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tomtom/tomtom_integration_test.go b/pkg/detectors/tomtom/tomtom_integration_test.go new file mode 100644 index 000000000000..2a5c854124a2 --- /dev/null +++ b/pkg/detectors/tomtom/tomtom_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tomtom + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTomtom_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TOMTOM") + inactiveSecret := testSecrets.MustGetField("TOMTOM_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tomtom secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tomtom, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tomtom secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tomtom, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tomtom.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tomtom.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tomtom/tomtom_test.go b/pkg/detectors/tomtom/tomtom_test.go index 2a5c854124a2..262509262332 100644 --- a/pkg/detectors/tomtom/tomtom_test.go +++ b/pkg/detectors/tomtom/tomtom_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tomtom import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTomtom_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TOMTOM") - inactiveSecret := testSecrets.MustGetField("TOMTOM_INACTIVE") +var ( + validPattern = "M9g89QjhYpjXUjMwHEUNLz1C9t58osdF" + invalidPattern = "M9g89QjhYpjXUjMw?EUNLz1C9t58osdF" + keyword = "tomtom" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTomtom_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tomtom secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tomtom, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tomtom", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tomtom secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tomtom, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tomtom.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tomtom.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tradier/tradier_integration_test.go b/pkg/detectors/tradier/tradier_integration_test.go new file mode 100644 index 000000000000..5c5842f7aa57 --- /dev/null +++ b/pkg/detectors/tradier/tradier_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tradier + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTradier_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TRADIER") + inactiveSecret := testSecrets.MustGetField("TRADIER_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tradier secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tradier, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tradier secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tradier, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tradier.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tradier.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tradier/tradier_test.go b/pkg/detectors/tradier/tradier_test.go index 5c5842f7aa57..e1331147ef6d 100644 --- a/pkg/detectors/tradier/tradier_test.go +++ b/pkg/detectors/tradier/tradier_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tradier import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTradier_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TRADIER") - inactiveSecret := testSecrets.MustGetField("TRADIER_INACTIVE") +var ( + validPattern = "RgH1pJ0XZKv9jh5UXHP1qGDsAxaw" + invalidPattern = "RgH1pJ0XZKv9jh?UXHP1qGDsAxaw" + keyword = "tradier" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTradier_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tradier secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tradier, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tradier", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, + }, + { + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tradier secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tradier, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tradier.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tradier.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/transferwise/transferwise_integration_test.go b/pkg/detectors/transferwise/transferwise_integration_test.go new file mode 100644 index 000000000000..4c0cb0a9e056 --- /dev/null +++ b/pkg/detectors/transferwise/transferwise_integration_test.go @@ -0,0 +1,117 @@ +package transferwise + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTransferwise_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TRANSFERWISE") + inactiveSecret := testSecrets.MustGetField("TRANSFERWISE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a transferwise secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Transferwise, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a transferwise secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Transferwise, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Transferwise.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Transferwise.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/transferwise/transferwise_test.go b/pkg/detectors/transferwise/transferwise_test.go index 4c0cb0a9e056..a7681225ebf6 100644 --- a/pkg/detectors/transferwise/transferwise_test.go +++ b/pkg/detectors/transferwise/transferwise_test.go @@ -4,114 +4,88 @@ import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTransferwise_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TRANSFERWISE") - inactiveSecret := testSecrets.MustGetField("TRANSFERWISE_INACTIVE") +var ( + validPattern = "e99848c6-aeb1-e339-935d-fa7e0d0-4160" + invalidPattern = "e99848c6?aeb1-e339-935d-fa7e0d0-4160" + keyword = "transferwise" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTransferwise_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a transferwise secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Transferwise, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword transferwise", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a transferwise secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Transferwise, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Transferwise.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Transferwise.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/travelpayouts/travelpayouts_integration_test.go b/pkg/detectors/travelpayouts/travelpayouts_integration_test.go new file mode 100644 index 000000000000..3d057f1986b0 --- /dev/null +++ b/pkg/detectors/travelpayouts/travelpayouts_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package travelpayouts + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTravelPayouts_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TRAVELPAYOUTS") + inactiveSecret := testSecrets.MustGetField("TRAVELPAYOUTS_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a travelpayouts secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TravelPayouts, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a travelpayouts secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TravelPayouts, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TravelPayouts.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TravelPayouts.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/travelpayouts/travelpayouts_test.go b/pkg/detectors/travelpayouts/travelpayouts_test.go index 3d057f1986b0..871ffdb8d99e 100644 --- a/pkg/detectors/travelpayouts/travelpayouts_test.go +++ b/pkg/detectors/travelpayouts/travelpayouts_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package travelpayouts import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTravelPayouts_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TRAVELPAYOUTS") - inactiveSecret := testSecrets.MustGetField("TRAVELPAYOUTS_INACTIVE") +var ( + validPattern = "au3cjfqsfb1wtolub0py5cayxliubnxg" + invalidPattern = "au3cjfqsf?1wtolub0py5cayxliubnxg" + keyword = "travelpayouts" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTravelPayouts_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a travelpayouts secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TravelPayouts, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword travelpayouts", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a travelpayouts secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TravelPayouts, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TravelPayouts.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TravelPayouts.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/travisci/travisci_integration_test.go b/pkg/detectors/travisci/travisci_integration_test.go new file mode 100644 index 000000000000..77e9fc0bd320 --- /dev/null +++ b/pkg/detectors/travisci/travisci_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package travisci + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTravisCI_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TRAVISCI_TOKEN") + inactiveSecret := testSecrets.MustGetField("TRAVISCI_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a travisci secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TravisCI, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a travisci secret %s within but unverified", inactiveSecret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TravisCI, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TravisCI.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TravisCI.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/travisci/travisci_test.go b/pkg/detectors/travisci/travisci_test.go index 77e9fc0bd320..1c140576904b 100644 --- a/pkg/detectors/travisci/travisci_test.go +++ b/pkg/detectors/travisci/travisci_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package travisci import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTravisCI_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TRAVISCI_TOKEN") - inactiveSecret := testSecrets.MustGetField("TRAVISCI_INACTIVE") +var ( + validPattern = "M1BeOsJ75okEpkQUqupx0Q" + invalidPattern = "M1BeOsJ75ok?pkQUqupx0Q" + keyword = "travisci" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTravisCI_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a travisci secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TravisCI, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword travisci", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a travisci secret %s within but unverified", inactiveSecret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TravisCI, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TravisCI.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TravisCI.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/trelloapikey/trelloapikey_integration_test.go b/pkg/detectors/trelloapikey/trelloapikey_integration_test.go new file mode 100644 index 000000000000..b2aff40aafd0 --- /dev/null +++ b/pkg/detectors/trelloapikey/trelloapikey_integration_test.go @@ -0,0 +1,123 @@ +//go:build detectors +// +build detectors + +package trelloapikey + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTrelloApiKey_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + token := testSecrets.MustGetField("TRELLOAPIKEY_TOKEN") + apiKey := testSecrets.MustGetField("TRELLO_API_KEY") + inactiveApiKey := testSecrets.MustGetField("TRELLO_API_KEY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a trello secret within https://api.trello.com/1/members/me?key=%s&token=%s", apiKey, token)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TrelloApiKey, + Redacted: apiKey, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a trello secret within https://api.trello.com/1/members/me?key=%s&token=%s but unverified", inactiveApiKey, token)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TrelloApiKey, + Redacted: "6abe2cb200b2edy1666avq5325476dfp", + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TrelloApiKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TrelloApiKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/trelloapikey/trelloapikey_test.go b/pkg/detectors/trelloapikey/trelloapikey_test.go index b2aff40aafd0..05327475cfdf 100644 --- a/pkg/detectors/trelloapikey/trelloapikey_test.go +++ b/pkg/detectors/trelloapikey/trelloapikey_test.go @@ -1,123 +1,83 @@ -//go:build detectors -// +build detectors - package trelloapikey import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTrelloApiKey_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - token := testSecrets.MustGetField("TRELLOAPIKEY_TOKEN") - apiKey := testSecrets.MustGetField("TRELLO_API_KEY") - inactiveApiKey := testSecrets.MustGetField("TRELLO_API_KEY_INACTIVE") +var ( + validToken = "Zqpox-FOtGtvV58NCfjox3y2KROmjsbvAUW2jLSL9XfPFs3P98BwmjzMKJA1KVPa" + invalidToken = "Zqpox-FOtGtvV58NCfjox3y2KROmjsbv?UW2jLSL9XfPFs3P98BwmjzMKJA1KVPa" + validKey = "NKVmEKtpJvQXU95OfApuFxCOmaRLcRsc" + invalidKey = "NKVmEKtpJvQXU95O?ApuFxCOmaRLcRsc" + keyword = "trelloapikey" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTrelloApiKey_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a trello secret within https://api.trello.com/1/members/me?key=%s&token=%s", apiKey, token)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TrelloApiKey, - Redacted: apiKey, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword trelloapikey", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validToken, keyword, validKey), + want: []string{validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a trello secret within https://api.trello.com/1/members/me?key=%s&token=%s but unverified", inactiveApiKey, token)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TrelloApiKey, - Redacted: "6abe2cb200b2edy1666avq5325476dfp", - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidToken, keyword, invalidKey), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TrelloApiKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TrelloApiKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/tru/tru_integration_test.go b/pkg/detectors/tru/tru_integration_test.go new file mode 100644 index 000000000000..17d56acf6bee --- /dev/null +++ b/pkg/detectors/tru/tru_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package tru + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTru_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + id := testSecrets.MustGetField("TRU_ID") + secret := testSecrets.MustGetField("TRU_SECRET") + inactiveID := testSecrets.MustGetField("TRU_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tru secret %s within tru id %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tru, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tru secret %s within tru id %s but not valid", secret, inactiveID)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tru, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tru.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tru.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tru/tru_test.go b/pkg/detectors/tru/tru_test.go index 17d56acf6bee..1cfa21e506ec 100644 --- a/pkg/detectors/tru/tru_test.go +++ b/pkg/detectors/tru/tru_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package tru import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTru_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - id := testSecrets.MustGetField("TRU_ID") - secret := testSecrets.MustGetField("TRU_SECRET") - inactiveID := testSecrets.MustGetField("TRU_INACTIVE") +var ( + validKey = "o23j7m2a-23ux-0hnl-0k57-4ph2omvcanxg" + invalidKey = "o23j7m2a?23ux-0hnl-0k57-4ph2omvcanxg" + validSecret = "PJ]IEWcY9HOIqDGfUqlFGRL1En" + invalidSecret = "PJ]IEWcY9HOIq!GfUqlFGRL1En" + keyword = "tru" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTru_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tru secret %s within tru id %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tru, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tru", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validSecret), + want: []string{validKey + validSecret}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tru secret %s within tru id %s but not valid", secret, inactiveID)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tru, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidSecret), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tru.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tru.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/trufflehogenterprise/trufflehogenterprise_integration_test.go b/pkg/detectors/trufflehogenterprise/trufflehogenterprise_integration_test.go new file mode 100644 index 000000000000..56e10f2b9416 --- /dev/null +++ b/pkg/detectors/trufflehogenterprise/trufflehogenterprise_integration_test.go @@ -0,0 +1,122 @@ +//go:build detectors +// +build detectors + +package trufflehogenterprise + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTrufflehogenterprise_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + host := testSecrets.MustGetField("THOG_HOSTNAME") + key := testSecrets.MustGetField("THOG_WEB_KEY") + secret := testSecrets.MustGetField("THOG_WEB_SECRET") + inactiveSecret := testSecrets.MustGetField("THOG_WEB_INACTIVE_SECRET") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a thog secret %s for %s with key %s within", secret, host, key)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TrufflehogEnterprise, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a trufflehogenterprise secret %s for %s with key %s within but not valid", inactiveSecret, host, key)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TrufflehogEnterprise, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Trufflehogenterprise.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Trufflehogenterprise.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/trufflehogenterprise/trufflehogenterprise_test.go b/pkg/detectors/trufflehogenterprise/trufflehogenterprise_test.go index 56e10f2b9416..0e9d64db72ed 100644 --- a/pkg/detectors/trufflehogenterprise/trufflehogenterprise_test.go +++ b/pkg/detectors/trufflehogenterprise/trufflehogenterprise_test.go @@ -1,122 +1,85 @@ -//go:build detectors -// +build detectors - package trufflehogenterprise import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTrufflehogenterprise_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - host := testSecrets.MustGetField("THOG_HOSTNAME") - key := testSecrets.MustGetField("THOG_WEB_KEY") - secret := testSecrets.MustGetField("THOG_WEB_SECRET") - inactiveSecret := testSecrets.MustGetField("THOG_WEB_INACTIVE_SECRET") +var ( + validKey = "thog-key-946c7e0fbee2baff" + invalidKey = "thog-key-946?7e0fbee2baff" + validSecret = "thog-secret-8fbfc135085421e62d8f1982af17bbf6" + invalidSecret = "thog-secret-8fbfc13?085421e62d8f1982af17bbf6" + validHostname = "uryfanaextzftqnwkordjwtrascqbihyctwttsntssxgbmgtnghmaiossoiablwqntsnudfdz-grthynfwdntsfdyuvk-tqfecqndhkmebecezcyzptxnsgprzkdcwzwnzdxpm.v6.zfjkrzjmutvvwwqftipvtkdwg.trufflehog.org" + invalidHostname = "?ryfanaextzftqnwkordjwtrascqbihyctwttsntssxgbmgtnghmaiossoiablwqntsnudfdz-grthynfwdntsfdyuvk-tqfecqndhkmebecezcyzptxnsgprzkdcwzwnzdxpm.v6.zfjkrzjmutvvwwqftipvtkdwg.trufflehog.org" + keyword = "trufflehogenterprise" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTrufflehogenterprise_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a thog secret %s for %s with key %s within", secret, host, key)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TrufflehogEnterprise, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword trufflehogenterprise", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validSecret, keyword, validHostname), + want: []string{validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a trufflehogenterprise secret %s for %s with key %s within but not valid", inactiveSecret, host, key)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TrufflehogEnterprise, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidSecret, keyword, invalidHostname), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Trufflehogenterprise.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Trufflehogenterprise.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/twelvedata/twelvedata_integration_test.go b/pkg/detectors/twelvedata/twelvedata_integration_test.go new file mode 100644 index 000000000000..f46b777a429d --- /dev/null +++ b/pkg/detectors/twelvedata/twelvedata_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package twelvedata + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTwelveData_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TWELVEDATA_TOKEN") + inactiveSecret := testSecrets.MustGetField("TWELVEDATA_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twelvedata secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TwelveData, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twelvedata secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TwelveData, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("TwelveData.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("TwelveData.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/twelvedata/twelvedata_test.go b/pkg/detectors/twelvedata/twelvedata_test.go index f46b777a429d..32479732b097 100644 --- a/pkg/detectors/twelvedata/twelvedata_test.go +++ b/pkg/detectors/twelvedata/twelvedata_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package twelvedata import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTwelveData_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TWELVEDATA_TOKEN") - inactiveSecret := testSecrets.MustGetField("TWELVEDATA_INACTIVE") +var ( + validPattern = "8qevq8517nx58ja9e7jrhe4uom3qbo3v" + invalidPattern = "8q?vq8517nx58ja9e7jrhe4uom3qbo3v" + keyword = "twelvedata" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTwelveData_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twelvedata secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TwelveData, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword twelvedata", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twelvedata secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TwelveData, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TwelveData.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("TwelveData.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/twilio/twilio_integration_test.go b/pkg/detectors/twilio/twilio_integration_test.go new file mode 100644 index 000000000000..246fd7fcd9fa --- /dev/null +++ b/pkg/detectors/twilio/twilio_integration_test.go @@ -0,0 +1,181 @@ +//go:build detectors +// +build detectors + +package twilio + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTwilio_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TWILLIO_API") + secretInactive := testSecrets.MustGetField("TWILLIO_API_INACTIVE") + id := testSecrets.MustGetField("TWILLIO_ID") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twilio, + Verified: true, + Redacted: id, + RawV2: []byte(id + secret), + ExtraData: map[string]string{ + "account_sid": "ACa5b6165773490f33f226d71e7ffacff5", + "friendly_name": "MyServiceName", + "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", + }, + }, + }, + wantErr: false, + }, + { + name: "found, not verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secretInactive, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twilio, + Verified: false, + Redacted: id, + RawV2: []byte(id + secretInactive), + ExtraData: map[string]string{ + "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", + }, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + { + name: "found, would be verified if not for timeout", + s: Scanner{client: common.SaneHttpClientTimeOut(100 * time.Microsecond)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twilio, + Verified: false, + Redacted: id, + RawV2: []byte(id + secret), + ExtraData: map[string]string{ + "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", + }, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, verified but unexpected api surface", + s: Scanner{client: common.ConstantResponseHttpClient(404, "")}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twilio, + Verified: false, + Redacted: id, + RawV2: []byte(id + secret), + ExtraData: map[string]string{ + "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", + }, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Twilio.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError", "AnalysisInfo") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("Twilio.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/twilio/twilio_test.go b/pkg/detectors/twilio/twilio_test.go index 246fd7fcd9fa..a7f30df60f67 100644 --- a/pkg/detectors/twilio/twilio_test.go +++ b/pkg/detectors/twilio/twilio_test.go @@ -1,180 +1,82 @@ -//go:build detectors -// +build detectors - package twilio import ( "context" "fmt" "testing" - "time" "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTwilio_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TWILLIO_API") - secretInactive := testSecrets.MustGetField("TWILLIO_API_INACTIVE") - id := testSecrets.MustGetField("TWILLIO_ID") +var ( + validSid = "AC1b3f0bddbb6887d68d8454e66c749c6a" + invalidSid = "AC1b3f0bddbb?887d68d8454e66c749c6a" + validKey = "daf7b3d34b9787f1212316eea62ba186" + invalidKey = "daf7b3d34b9787f1?12316eea62ba186" + keyword = "twilio" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTwilio_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - wantVerificationErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twilio, - Verified: true, - Redacted: id, - RawV2: []byte(id + secret), - ExtraData: map[string]string{ - "account_sid": "ACa5b6165773490f33f226d71e7ffacff5", - "friendly_name": "MyServiceName", - "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", - }, - }, - }, - wantErr: false, - }, - { - name: "found, not verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secretInactive, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twilio, - Verified: false, - Redacted: id, - RawV2: []byte(id + secretInactive), - ExtraData: map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", - }, - }, - }, - wantErr: false, + name: "valid pattern - with keyword twilio", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validSid, keyword, validKey), + want: []string{validSid + validKey}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, - }, - { - name: "found, would be verified if not for timeout", - s: Scanner{client: common.SaneHttpClientTimeOut(100 * time.Microsecond)}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twilio, - Verified: false, - Redacted: id, - RawV2: []byte(id + secret), - ExtraData: map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", - }, - }, - }, - wantErr: false, - wantVerificationErr: true, - }, - { - name: "found, verified but unexpected api surface", - s: Scanner{client: common.ConstantResponseHttpClient(404, "")}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twillio secret %s within awsId %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twilio, - Verified: false, - Redacted: id, - RawV2: []byte(id + secret), - ExtraData: map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/twilio/", - }, - }, - }, - wantErr: false, - wantVerificationErr: true, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidSid, keyword, invalidKey), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Twilio.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - if (got[i].VerificationError() != nil) != tt.wantVerificationErr { - t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } - ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError", "AnalysisInfo") - if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { - t.Errorf("Twilio.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) - } + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/twist/twist_integration_test.go b/pkg/detectors/twist/twist_integration_test.go new file mode 100644 index 000000000000..88027608c823 --- /dev/null +++ b/pkg/detectors/twist/twist_integration_test.go @@ -0,0 +1,117 @@ +package twist + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTwist_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TWIST") + inactiveSecret := testSecrets.MustGetField("TWIST_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twist secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twist, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twist secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twist, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Twist.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Twist.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/twist/twist_test.go b/pkg/detectors/twist/twist_test.go index 88027608c823..d09c71ded091 100644 --- a/pkg/detectors/twist/twist_test.go +++ b/pkg/detectors/twist/twist_test.go @@ -4,114 +4,89 @@ import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTwist_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TWIST") - inactiveSecret := testSecrets.MustGetField("TWIST_INACTIVE") +var ( + prefix = "oauth2:" + validPattern = "dba06b506389c94f2bfc010fa83690cf7171dd40" + invalidPattern = "dga06b506389c94f2bfc010fa83690cf7171dd40" + keyword = "twist" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTwist_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twist secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twist, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword twist", + input: fmt.Sprintf("%s token = '%s%s'", keyword, prefix, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twist secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twist, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s%s' | '%s%s'", keyword, prefix, validPattern, prefix, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s%s'", keyword, prefix, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s%s'", keyword, prefix, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Twist.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Twist.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/twitch/twitch_integration_test.go b/pkg/detectors/twitch/twitch_integration_test.go new file mode 100644 index 000000000000..a708e037ad04 --- /dev/null +++ b/pkg/detectors/twitch/twitch_integration_test.go @@ -0,0 +1,210 @@ +//go:build detectors +// +build detectors + +package twitch + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTwitch_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TWITCH") + id := testSecrets.MustGetField("TWITCH_ID") + inactiveSecret := testSecrets.MustGetField("TWITCH_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s", secret, id)), + verify: true, + }, + // the detector will try every combination of the secret and id for + // client_id and client_secret, so we expect 4 results + // but only 1 of them will be verified + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: true, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "found, would be verified if not for timeout", + s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, verified but unexpected api surface", + s: Scanner{client: common.ConstantResponseHttpClient(404, "")}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s but not valid", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + { + DetectorType: detectorspb.DetectorType_Twitch, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Twitch.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Errorf("Twitch.FromData() verificationError = %v, wantVerificationErr %v", got[i].VerificationError(), tt.wantVerificationErr) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("Twitch.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/twitch/twitch_test.go b/pkg/detectors/twitch/twitch_test.go index a708e037ad04..d7c144b6fe3e 100644 --- a/pkg/detectors/twitch/twitch_test.go +++ b/pkg/detectors/twitch/twitch_test.go @@ -1,209 +1,82 @@ -//go:build detectors -// +build detectors - package twitch import ( "context" "fmt" "testing" - "time" "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTwitch_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TWITCH") - id := testSecrets.MustGetField("TWITCH_ID") - inactiveSecret := testSecrets.MustGetField("TWITCH_INACTIVE") +var ( + validKey = "k0lj2mwl8n3ztmrivlpbc7obk914sj" + invalidKey = "k0lj2mwl8n3ztmr?vlpbc7obk914sj" + validId = "kn64qw9jt39bhni04h2k5jc7ebefyn" + invalidId = "kn64qw9jt39bhni?4h2k5jc7ebefyn" + keyword = "twitch" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTwitch_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - wantVerificationErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s", secret, id)), - verify: true, - }, - // the detector will try every combination of the secret and id for - // client_id and client_secret, so we expect 4 results - // but only 1 of them will be verified - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: true, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "found, would be verified if not for timeout", - s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - }, - wantErr: false, - wantVerificationErr: true, - }, - { - name: "found, verified but unexpected api surface", - s: Scanner{client: common.ConstantResponseHttpClient(404, "")}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - }, - wantErr: false, - wantVerificationErr: true, - }, - { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitch secret %s within twitch %s but not valid", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - { - DetectorType: detectorspb.DetectorType_Twitch, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - with keyword twitch", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validId), + want: []string{validKey, validId, validId, validKey}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidId), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Twitch.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - if (got[i].VerificationError() != nil) != tt.wantVerificationErr { - t.Errorf("Twitch.FromData() verificationError = %v, wantVerificationErr %v", got[i].VerificationError(), tt.wantVerificationErr) + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } - ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") - if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { - t.Errorf("Twitch.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) - } + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/twitter/v1/twitter_v1_integration_test.go b/pkg/detectors/twitter/v1/twitter_v1_integration_test.go new file mode 100644 index 000000000000..b65178082991 --- /dev/null +++ b/pkg/detectors/twitter/v1/twitter_v1_integration_test.go @@ -0,0 +1,126 @@ +//go:build detectors +// +build detectors + +package twitter + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTwitter_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TWITTER") + inactiveSecret := testSecrets.MustGetField("TWITTER_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitter secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitter, + Verified: true, + ExtraData: map[string]string{ + "version": "1", + }, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitter secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitter, + Verified: false, + ExtraData: map[string]string{ + "version": "1", + }, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Twitter.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Twitter.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/twitter/v1/twitter_v1_test.go b/pkg/detectors/twitter/v1/twitter_v1_test.go index b65178082991..4facd32740d1 100644 --- a/pkg/detectors/twitter/v1/twitter_v1_test.go +++ b/pkg/detectors/twitter/v1/twitter_v1_test.go @@ -1,126 +1,91 @@ -//go:build detectors -// +build detectors - package twitter import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTwitter_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TWITTER") - inactiveSecret := testSecrets.MustGetField("TWITTER_INACTIVE") +var ( + validPattern = "EGGIFBZPKVTIKCEJCSRHQY%5PabdkLS20yFOCKlOiXqbTG%PHGkqp%Ybm%2YgUAvoI-%3ZXVX8KgdsYSJ1DEgoxhGkoY6cNMN8EZidvGnJQ4Z8-4V2fSbdMU" + invalidPattern = "EGGIFBZPKVTIKCEJCSRHQY%5PabdkLS20yFOCKlOiXqbTG%PHGkqp%Ybm%2Y?UAvoI-%3ZXVX8KgdsYSJ1DEgoxhGkoY6cNMN8EZidvGnJQ4Z8-4V2fSbdMU" + keyword = "twitter" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTwitter_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitter secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitter, - Verified: true, - ExtraData: map[string]string{ - "version": "1", - }, - }, - }, - wantErr: false, + name: "valid pattern - with keyword twitter", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitter secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitter, - Verified: false, - ExtraData: map[string]string{ - "version": "1", - }, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Twitter.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Twitter.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/twitter/v2/twitter_v2_integration_test.go b/pkg/detectors/twitter/v2/twitter_v2_integration_test.go new file mode 100644 index 000000000000..aac6f391c84f --- /dev/null +++ b/pkg/detectors/twitter/v2/twitter_v2_integration_test.go @@ -0,0 +1,126 @@ +//go:build detectors +// +build detectors + +package twitter + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTwitter_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TWITTER_V2_ACTIVE") + inactiveSecret := testSecrets.MustGetField("TWITTER_V2_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitter secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitter, + Verified: true, + ExtraData: map[string]string{ + "version": "2", + }, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitter secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Twitter, + Verified: false, + ExtraData: map[string]string{ + "version": "2", + }, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Twitter.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Twitter.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/twitter/v2/twitter_v2_test.go b/pkg/detectors/twitter/v2/twitter_v2_test.go index aac6f391c84f..64072786932e 100644 --- a/pkg/detectors/twitter/v2/twitter_v2_test.go +++ b/pkg/detectors/twitter/v2/twitter_v2_test.go @@ -1,126 +1,91 @@ -//go:build detectors -// +build detectors - package twitter import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTwitter_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TWITTER_V2_ACTIVE") - inactiveSecret := testSecrets.MustGetField("TWITTER_V2_INACTIVE") +var ( + validPattern = "6pvFJmFvkNjdMImKVWF1NOZ%9n5K8rIH4JOfczZSg5uAsHErlQY6c5TrAygcZYF9WttO1m4LGhg0IUbFgXcaXtAmH4SnIb5cHac0sNcQHVKXZgaKxMqw1Fvt87RcyYKJ1bSP0BGPE%p912wZKbAkTcRfglLMyrcV6aMnuxbj7ctFoulIgFw8Aljzv87cs4" + invalidPattern = "6pvFJmFvkNjdMImKVWF1NOZ%9n5K8rIH4JOf?zZSg5uAsHErlQY6c5TrAygcZYF9WttO1m4LGhg0IUbFgXcaXtAmH4SnIb5cHac0sNcQHVKXZgaKxMqw1Fvt87RcyYKJ1bSP0BGPE%p912wZKbAkTcRfglLMyrcV6aMnuxbj7ctFoulIgFw8Aljzv87cs4" + keyword = "twitter" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTwitter_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitter secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitter, - Verified: true, - ExtraData: map[string]string{ - "version": "2", - }, - }, - }, - wantErr: false, + name: "valid pattern - with keyword twitter", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitter secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Twitter, - Verified: false, - ExtraData: map[string]string{ - "version": "2", - }, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Twitter.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Twitter.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/twitterconsumerkey/twitterconsumerkey_integration_test.go b/pkg/detectors/twitterconsumerkey/twitterconsumerkey_integration_test.go new file mode 100644 index 000000000000..8ba404b35062 --- /dev/null +++ b/pkg/detectors/twitterconsumerkey/twitterconsumerkey_integration_test.go @@ -0,0 +1,139 @@ +//go:build detectors +// +build detectors + +package twitterconsumerkey + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTwitterConsumerKey_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + key := testSecrets.MustGetField("TWITTER_CONSUMER_KEY") + secret := testSecrets.MustGetField("TWITTER_CONSUMER_SECRET") + + inactiveKey := testSecrets.MustGetField("TWITTER_CONSUMER_KEY_INACTIVE") + inactiveSecret := testSecrets.MustGetField("TWITTER_CONSUMER_SECRET_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitter key %s and secret %s within", key, secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TwitterConsumerkey, + Verified: true, + Raw: []byte(key), + RawV2: []byte(key + secret), + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a twitter key %s and secret %s within", inactiveKey, inactiveSecret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_TwitterConsumerkey, + Verified: false, + Raw: []byte(inactiveKey), + RawV2: []byte(inactiveKey + inactiveSecret), + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the key & secret within"), + verify: true, + }, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + + if (err != nil) != tt.wantErr { + t.Errorf("TwitterConsumerKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if len(got[i].RawV2) == 0 { + t.Fatalf("no rawV2 secret present: \n %+v", got[i]) + } + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "verificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("TwitterConsumerKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/twitterconsumerkey/twitterconsumerkey_test.go b/pkg/detectors/twitterconsumerkey/twitterconsumerkey_test.go index 8ba404b35062..9d9bd49849b4 100644 --- a/pkg/detectors/twitterconsumerkey/twitterconsumerkey_test.go +++ b/pkg/detectors/twitterconsumerkey/twitterconsumerkey_test.go @@ -1,138 +1,82 @@ -//go:build detectors -// +build detectors - package twitterconsumerkey import ( "context" "fmt" "testing" - "time" "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTwitterConsumerKey_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - key := testSecrets.MustGetField("TWITTER_CONSUMER_KEY") - secret := testSecrets.MustGetField("TWITTER_CONSUMER_SECRET") - - inactiveKey := testSecrets.MustGetField("TWITTER_CONSUMER_KEY_INACTIVE") - inactiveSecret := testSecrets.MustGetField("TWITTER_CONSUMER_SECRET_INACTIVE") +var ( + validKey = "xdIAmQrrNE8knt6fwO6BQ0MMA" + invalidKey = "xdIAmQrrNE8k?t6fwO6BQ0MMA" + validSecret = "6SIZsEihsjUTkGlUWpdPeLvmplgEYaI3FvKJ5o0cMijLlGaokd" + invalidSecret = "6SIZsEihsjUTkG?UWpdPeLvmplgEYaI3FvKJ5o0cMijLlGaokd" + keyword = "twitterconsumerkey" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTwitterConsumerKey_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - wantVerificationErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitter key %s and secret %s within", key, secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TwitterConsumerkey, - Verified: true, - Raw: []byte(key), - RawV2: []byte(key + secret), - }, - }, - wantErr: false, - wantVerificationErr: false, - }, - { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a twitter key %s and secret %s within", inactiveKey, inactiveSecret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_TwitterConsumerkey, - Verified: false, - Raw: []byte(inactiveKey), - RawV2: []byte(inactiveKey + inactiveSecret), - }, - }, - wantErr: false, - wantVerificationErr: true, + name: "valid pattern - with keyword twitterconsumerkey", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validSecret), + want: []string{validKey + validSecret}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the key & secret within"), - verify: true, - }, - want: nil, - wantErr: false, - wantVerificationErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidSecret), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("TwitterConsumerKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - if len(got[i].RawV2) == 0 { - t.Fatalf("no rawV2 secret present: \n %+v", got[i]) + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - if (got[i].VerificationError() != nil) != tt.wantVerificationErr { - t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } - ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "verificationError") - if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { - t.Errorf("TwitterConsumerKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) - } + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/tyntec/tyntec_integration_test.go b/pkg/detectors/tyntec/tyntec_integration_test.go new file mode 100644 index 000000000000..ddb10f9f8cc1 --- /dev/null +++ b/pkg/detectors/tyntec/tyntec_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package tyntec + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTyntec_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TYNTEC") + inactiveSecret := testSecrets.MustGetField("TYNTEC_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tyntec secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tyntec, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a tyntec secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Tyntec, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Tyntec.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Tyntec.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/tyntec/tyntec_test.go b/pkg/detectors/tyntec/tyntec_test.go index ddb10f9f8cc1..152358885bf2 100644 --- a/pkg/detectors/tyntec/tyntec_test.go +++ b/pkg/detectors/tyntec/tyntec_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package tyntec import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTyntec_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TYNTEC") - inactiveSecret := testSecrets.MustGetField("TYNTEC_INACTIVE") +var ( + validPattern = "3AZo8XEgSEofwdIlSjdlvPfuzTpPIarb" + invalidPattern = "3AZo8XEg?EofwdIlSjdlvPfuzTpPIarb" + keyword = "tyntec" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTyntec_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tyntec secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tyntec, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword tyntec", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a tyntec secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Tyntec, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Tyntec.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Tyntec.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/typeform/v1/typeform_integration_test.go b/pkg/detectors/typeform/v1/typeform_integration_test.go new file mode 100644 index 000000000000..ac5971a37458 --- /dev/null +++ b/pkg/detectors/typeform/v1/typeform_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package typeform + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTypeform_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TYPEFORM") + inactiveSecret := testSecrets.MustGetField("TYPEFORM_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a typeform secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Typeform, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a typeform secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Typeform, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Typeform.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Typeform.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/typeform/v1/typeform_test.go b/pkg/detectors/typeform/v1/typeform_test.go index ac5971a37458..c22c753ff032 100644 --- a/pkg/detectors/typeform/v1/typeform_test.go +++ b/pkg/detectors/typeform/v1/typeform_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package typeform import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTypeform_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TYPEFORM") - inactiveSecret := testSecrets.MustGetField("TYPEFORM_INACTIVE") +var ( + validPattern = "tfpsOcxPYaEVD4SNQ5f7HptyomeOpE1SaEuEbkusbBLR" + invalidPattern = "tf?sOcxPYaEVD4SNQ5f7HptyomeOpE1SaEuEbkusbBLR" + keyword = "typeform" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTypeform_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a typeform secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Typeform, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword typeform", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a typeform secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Typeform, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Typeform.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Typeform.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/typetalk/typetalk_integration_test.go b/pkg/detectors/typetalk/typetalk_integration_test.go new file mode 100644 index 000000000000..f99d6484c56f --- /dev/null +++ b/pkg/detectors/typetalk/typetalk_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package typetalk + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestTypetalk_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("TYPETALK") + id := testSecrets.MustGetField("TYPETALK_ID") + inactiveSecret := testSecrets.MustGetField("TYPETALK_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a typetalk secret %s within typetalk %s", secret, id)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Typetalk, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a typetalk secret %s within typetalk %s but not valid", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Typetalk, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Typetalk.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Typetalk.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/typetalk/typetalk_test.go b/pkg/detectors/typetalk/typetalk_test.go index f99d6484c56f..ec6b3baeeaf9 100644 --- a/pkg/detectors/typetalk/typetalk_test.go +++ b/pkg/detectors/typetalk/typetalk_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package typetalk import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestTypetalk_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("TYPETALK") - id := testSecrets.MustGetField("TYPETALK_ID") - inactiveSecret := testSecrets.MustGetField("TYPETALK_INACTIVE") +var ( + validKey = "MHxch0NrViQ41psiK4ypAwrUxzbKC3YeEuL1CPRedPiVouXflbdW4nKcaJ8Iidua" + invalidKey = "MHxch0NrViQ41psiK4ypAwrUxzbKC3Ye?uL1CPRedPiVouXflbdW4nKcaJ8Iidua" + validId = "ScH0wbXrV7gBPgTZgjpNy2mtcHbelKGh" + invalidId = "ScH0wbXrV7?BPgTZgjpNy2mtcHbelKGh" + keyword = "typetalk" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestTypetalk_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a typetalk secret %s within typetalk %s", secret, id)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Typetalk, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword typetalk", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validId), + want: []string{validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a typetalk secret %s within typetalk %s but not valid", inactiveSecret, id)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Typetalk, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidId), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Typetalk.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Typetalk.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/ubidots/ubidots_integration_test.go b/pkg/detectors/ubidots/ubidots_integration_test.go new file mode 100644 index 000000000000..4dd26f2fc9d2 --- /dev/null +++ b/pkg/detectors/ubidots/ubidots_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package ubidots + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUbidots_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UBIDOTS_TOKEN") + inactiveSecret := testSecrets.MustGetField("UBIDOTS_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a ubidots secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Ubidots, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a ubidots secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Ubidots, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Ubidots.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Ubidots.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/ubidots/ubidots_test.go b/pkg/detectors/ubidots/ubidots_test.go index 4dd26f2fc9d2..7b7f102057a0 100644 --- a/pkg/detectors/ubidots/ubidots_test.go +++ b/pkg/detectors/ubidots/ubidots_test.go @@ -1,120 +1,81 @@ -//go:build detectors -// +build detectors - package ubidots import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUbidots_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UBIDOTS_TOKEN") - inactiveSecret := testSecrets.MustGetField("UBIDOTS_INACTIVE") +var ( + validPattern = "BBFF-ZgyYQIXWnkYcnga3ShOXhm4DlMJpSE" + invalidPattern = "BBFF-ZgyYQIXW?kYcnga3ShOXhm4DlMJpSE" + keyword = "ubidots" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUbidots_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a ubidots secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Ubidots, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword ubidots", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a ubidots secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Ubidots, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Ubidots.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Ubidots.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/uclassify/uclassify_integration_test.go b/pkg/detectors/uclassify/uclassify_integration_test.go new file mode 100644 index 000000000000..f3b4e2a4041d --- /dev/null +++ b/pkg/detectors/uclassify/uclassify_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package uclassify + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUclassify_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UCLASSIFY") + inactiveSecret := testSecrets.MustGetField("UCLASSIFY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uclassify secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Uclassify, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uclassify secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Uclassify, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Uclassify.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Uclassify.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/uclassify/uclassify_test.go b/pkg/detectors/uclassify/uclassify_test.go index f3b4e2a4041d..e5ef3bc8732e 100644 --- a/pkg/detectors/uclassify/uclassify_test.go +++ b/pkg/detectors/uclassify/uclassify_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package uclassify import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUclassify_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UCLASSIFY") - inactiveSecret := testSecrets.MustGetField("UCLASSIFY_INACTIVE") +var ( + validPattern = "xM98lZh20vZs" + invalidPattern = "xM98lZ?20vZs" + keyword = "uclassify" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUclassify_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uclassify secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Uclassify, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword uclassify", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uclassify secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Uclassify, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Uclassify.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Uclassify.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/unifyid/unifyid_integration_test.go b/pkg/detectors/unifyid/unifyid_integration_test.go new file mode 100644 index 000000000000..4e59fce1e1b8 --- /dev/null +++ b/pkg/detectors/unifyid/unifyid_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package unifyid + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUnifyid_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UNIFY") + inactiveSecret := testSecrets.MustGetField("UNIFY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a unifyid secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UnifyID, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a unifyid secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UnifyID, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Unifyid.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Unifyid.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/unifyid/unifyid_test.go b/pkg/detectors/unifyid/unifyid_test.go index 4e59fce1e1b8..3d114f1217d1 100644 --- a/pkg/detectors/unifyid/unifyid_test.go +++ b/pkg/detectors/unifyid/unifyid_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package unifyid import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUnifyid_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UNIFY") - inactiveSecret := testSecrets.MustGetField("UNIFY_INACTIVE") +var ( + validPattern = "AstGk-tlQZmhHAzRpyMO841oOydxYadtwiixD_IHuVEy" + invalidPattern = "AstGk-tlQZmhHAzRpyMO84?oOydxYadtwiixD_IHuVEy" + keyword = "unify" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUnifyid_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a unifyid secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UnifyID, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword unify", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a unifyid secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UnifyID, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Unifyid.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Unifyid.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/unplugg/unplugg_integration_test.go b/pkg/detectors/unplugg/unplugg_integration_test.go new file mode 100644 index 000000000000..f021bfa7c9de --- /dev/null +++ b/pkg/detectors/unplugg/unplugg_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package unplugg + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUnplugg_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UNPLUGG") + inactiveSecret := testSecrets.MustGetField("UNPLUGG_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a unplugg secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Unplugg, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a unplugg secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Unplugg, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Unplugg.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Unplugg.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/unplugg/unplugg_test.go b/pkg/detectors/unplugg/unplugg_test.go index f021bfa7c9de..3b5e7d2e729f 100644 --- a/pkg/detectors/unplugg/unplugg_test.go +++ b/pkg/detectors/unplugg/unplugg_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package unplugg import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUnplugg_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UNPLUGG") - inactiveSecret := testSecrets.MustGetField("UNPLUGG_INACTIVE") +var ( + validPattern = "jk0kl5thu7l6ny1zzxjbml6762ipy9wc6zlcnxyakwnisxbj5m2svwaz0wxdbgco" + invalidPattern = "jk0kl5thu7l?ny1zzxjbml6762ipy9wc6zlcnxyakwnisxbj5m2svwaz0wxdbgco" + keyword = "unplugg" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUnplugg_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a unplugg secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Unplugg, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword unplugg", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a unplugg secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Unplugg, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Unplugg.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Unplugg.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/unsplash/unsplash_integration_test.go b/pkg/detectors/unsplash/unsplash_integration_test.go new file mode 100644 index 000000000000..20941b87e0a0 --- /dev/null +++ b/pkg/detectors/unsplash/unsplash_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package unsplash + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUnsplash_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UNSPLASH") + inactiveSecret := testSecrets.MustGetField("UNSPLASH_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a unsplash secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Unsplash, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a unsplash secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Unsplash, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Unsplash.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Unsplash.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/unsplash/unsplash_test.go b/pkg/detectors/unsplash/unsplash_test.go index 20941b87e0a0..eea0e86e0ab9 100644 --- a/pkg/detectors/unsplash/unsplash_test.go +++ b/pkg/detectors/unsplash/unsplash_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package unsplash import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUnsplash_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UNSPLASH") - inactiveSecret := testSecrets.MustGetField("UNSPLASH_INACTIVE") +var ( + validPattern = "xpDbiR6oablsvuTJoG182EhKSesjV96AvReWRcKHZ3p" + invalidPattern = "xpDbiR6oablsvuTJoG182?hKSesjV96AvReWRcKHZ3p" + keyword = "unsplash" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUnsplash_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a unsplash secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Unsplash, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword unsplash", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a unsplash secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Unsplash, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Unsplash.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Unsplash.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/upcdatabase/upcdatabase_integration_test.go b/pkg/detectors/upcdatabase/upcdatabase_integration_test.go new file mode 100644 index 000000000000..1bf3e3a28f16 --- /dev/null +++ b/pkg/detectors/upcdatabase/upcdatabase_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package upcdatabase + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUPCDatabase_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UPCDATABASE") + inactiveSecret := testSecrets.MustGetField("UPCDATABASE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a upcdatabase secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UPCDatabase, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a upcdatabase secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UPCDatabase, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("UPCDatabase.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("UPCDatabase.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/upcdatabase/upcdatabase_test.go b/pkg/detectors/upcdatabase/upcdatabase_test.go index 1bf3e3a28f16..73b3efe767d1 100644 --- a/pkg/detectors/upcdatabase/upcdatabase_test.go +++ b/pkg/detectors/upcdatabase/upcdatabase_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package upcdatabase import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUPCDatabase_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UPCDATABASE") - inactiveSecret := testSecrets.MustGetField("UPCDATABASE_INACTIVE") +var ( + validPattern = "CMJC3HG2BC4MSDUVGGO4ZO4VWDRHB4SO" + invalidPattern = "CMJC3H?2BC4MSDUVGGO4ZO4VWDRHB4SO" + keyword = "upcdatabase" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUPCDatabase_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a upcdatabase secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UPCDatabase, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword upcdatabase", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a upcdatabase secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UPCDatabase, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("UPCDatabase.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("UPCDatabase.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/uplead/uplead_integration_test.go b/pkg/detectors/uplead/uplead_integration_test.go new file mode 100644 index 000000000000..6268796550b7 --- /dev/null +++ b/pkg/detectors/uplead/uplead_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package uplead + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUplead_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UPLEAD") + inactiveSecret := testSecrets.MustGetField("UPLEAD_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uplead secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Uplead, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uplead secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Uplead, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Uplead.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatal("no raw secret present") + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Uplead.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/uplead/uplead_test.go b/pkg/detectors/uplead/uplead_test.go index 6268796550b7..7d4b0bef47b8 100644 --- a/pkg/detectors/uplead/uplead_test.go +++ b/pkg/detectors/uplead/uplead_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package uplead import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUplead_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UPLEAD") - inactiveSecret := testSecrets.MustGetField("UPLEAD_INACTIVE") +var ( + validPattern = "b1xnxn56l6jzvq3j9m2o0oio-aga9bzq" + invalidPattern = "b1xnxn56l6jzvq3j?m2o0oio-aga9bzq" + keyword = "uplead" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUplead_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uplead secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Uplead, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword uplead", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uplead secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Uplead, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Uplead.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatal("no raw secret present") - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Uplead.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/uploadcare/uploadcare_integration_test.go b/pkg/detectors/uploadcare/uploadcare_integration_test.go new file mode 100644 index 000000000000..25e46a061216 --- /dev/null +++ b/pkg/detectors/uploadcare/uploadcare_integration_test.go @@ -0,0 +1,118 @@ +//go:build detectors +// +build detectors + +package uploadcare + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUploadCare_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UPLOADCARE") + publicKey := testSecrets.MustGetField("UPLOADCARE_PUBLIC") + inactiveSecret := testSecrets.MustGetField("UPLOADCARE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uploadcare secret %s within uploadcare %s", secret, publicKey)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UploadCare, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uploadcare secret %s within uploadcare %s but not valid", inactiveSecret, publicKey)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UploadCare, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("UploadCare.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("UploadCare.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + s.FromData(ctx, false, data) + } + }) + } +} diff --git a/pkg/detectors/uploadcare/uploadcare_test.go b/pkg/detectors/uploadcare/uploadcare_test.go index 25e46a061216..0ab91249016a 100644 --- a/pkg/detectors/uploadcare/uploadcare_test.go +++ b/pkg/detectors/uploadcare/uploadcare_test.go @@ -1,117 +1,82 @@ -//go:build detectors -// +build detectors - package uploadcare import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUploadCare_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UPLOADCARE") - publicKey := testSecrets.MustGetField("UPLOADCARE_PUBLIC") - inactiveSecret := testSecrets.MustGetField("UPLOADCARE_INACTIVE") +var ( + validKey = "ktoicf994nsq4ht02f1k" + invalidKey = "ktoicf994n?q4ht02f1k" + validPublicKey = "vq2fk1qxwn4bker4ukk1" + invalidPublicKey = "vq2fk1qxwn?bker4ukk1" + keyword = "uploadcare" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUploadCare_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uploadcare secret %s within uploadcare %s", secret, publicKey)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UploadCare, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword uploadcare", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, validKey, keyword, validPublicKey), + want: []string{validKey, validPublicKey, validPublicKey, validKey}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uploadcare secret %s within uploadcare %s but not valid", inactiveSecret, publicKey)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UploadCare, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s token - '%s'\n%s token - '%s'\n", keyword, invalidKey, keyword, invalidPublicKey), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("UploadCare.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - got[i].Raw = nil + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("UploadCare.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} + } + } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - s.FromData(ctx, false, data) + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/uptimerobot/uptimerobot_integration_test.go b/pkg/detectors/uptimerobot/uptimerobot_integration_test.go new file mode 100644 index 000000000000..7e6637138151 --- /dev/null +++ b/pkg/detectors/uptimerobot/uptimerobot_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package uptimerobot + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUptimeRobot_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UPTIMEROBOT") + inactiveSecret := testSecrets.MustGetField("UPTIMEROBOT_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uptimerobot secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UptimeRobot, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uptimerobot secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UptimeRobot, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("UptimeRobot.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("UptimeRobot.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/uptimerobot/uptimerobot_test.go b/pkg/detectors/uptimerobot/uptimerobot_test.go index 7e6637138151..1b1ae7226f88 100644 --- a/pkg/detectors/uptimerobot/uptimerobot_test.go +++ b/pkg/detectors/uptimerobot/uptimerobot_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package uptimerobot import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUptimeRobot_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UPTIMEROBOT") - inactiveSecret := testSecrets.MustGetField("UPTIMEROBOT_INACTIVE") +var ( + validPattern = "S0CUf2rZs-SyEiUoQb9g2p6qdwcypE8jjY" + invalidPattern = "S0CUf2rZs-SyEiUoQ?9g2p6qdwcypE8jjY" + keyword = "uptimerobot" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUptimeRobot_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uptimerobot secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UptimeRobot, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword uptimerobot", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uptimerobot secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UptimeRobot, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("UptimeRobot.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("UptimeRobot.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/upwave/upwave_integration_test.go b/pkg/detectors/upwave/upwave_integration_test.go new file mode 100644 index 000000000000..15371ebe1789 --- /dev/null +++ b/pkg/detectors/upwave/upwave_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package upwave + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUpwave_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("UPWAVE") + inactiveSecret := testSecrets.MustGetField("UPWAVE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a upwave secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Upwave, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a upwave secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Upwave, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Upwave.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Upwave.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/upwave/upwave_test.go b/pkg/detectors/upwave/upwave_test.go index 15371ebe1789..6f6ed04a0824 100644 --- a/pkg/detectors/upwave/upwave_test.go +++ b/pkg/detectors/upwave/upwave_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package upwave import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUpwave_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("UPWAVE") - inactiveSecret := testSecrets.MustGetField("UPWAVE_INACTIVE") +var ( + validPattern = "mgepiy2lx8tttu5h45efviri2ngz83y0" + invalidPattern = "mgepiy2lx8tttu5h?5efviri2ngz83y0" + keyword = "upwave" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUpwave_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a upwave secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Upwave, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword upwave", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a upwave secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Upwave, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Upwave.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Upwave.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/uri/uri_integration_test.go b/pkg/detectors/uri/uri_integration_test.go new file mode 100644 index 000000000000..567c97979b5d --- /dev/null +++ b/pkg/detectors/uri/uri_integration_test.go @@ -0,0 +1,165 @@ +//go:build detectors +// +build detectors + +package uri + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestURI_FromChunk(t *testing.T) { + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, unverified, wrong username", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://user:pass@httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx")), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_URI, + Verified: false, + Redacted: "https://user:********@httpwatch.com", + }, + }, + wantErr: false, + }, + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://httpwatch:pass@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx")), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_URI, + Verified: true, + Redacted: "https://httpwatch:********@www.httpwatch.com", + }, + }, + wantErr: false, + }, + { + name: "found, verified, defused", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://httpwatch:pass@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx?foo=bar")), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_URI, + Verified: true, + Redacted: "https://httpwatch:********@www.httpwatch.com", + }, + }, + wantErr: false, + }, + { + name: "found, verified if not for timeout", + s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://httpwatch:pass@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx")), + verify: true, + }, + want: func() []detectors.Result { + r := detectors.Result{ + DetectorType: detectorspb.DetectorType_URI, + Verified: false, + Redacted: "https://httpwatch:********@www.httpwatch.com", + } + r.SetVerificationError(fmt.Errorf("context deadline exceeded")) + return []detectors.Result{r} + }(), + wantErr: false, + wantVerificationErr: true, + }, + { + name: "bad scheme", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("file://user:pass@foo.com:123/wh/at/ever"), + verify: true, + }, + wantErr: false, + }, + { + name: "nothing found, password was redacted two different ways", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("Both %s and %s have been redacted within", "https://httpwatch::********@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx?foo=bar", "https://httpwatch::%2A%2A%2A%2A%2A%2A@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx?foo=bar")), + verify: true, + }, + want: []detectors.Result{}, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.s.allowKnownTestSites = true + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("URI.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + // if os.Getenv("FORCE_PASS_DIFF") == "true" { + // return + // } + for i := range got { + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Errorf("URI.FromData() error = %v, wantVerificationErr %v", got[i].VerificationError(), tt.want[i]) + return + } + got[i].Raw = nil + got[i].RawV2 = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("URI.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/uri/uri_test.go b/pkg/detectors/uri/uri_test.go index 567c97979b5d..7c0762bb0434 100644 --- a/pkg/detectors/uri/uri_test.go +++ b/pkg/detectors/uri/uri_test.go @@ -1,165 +1,81 @@ -//go:build detectors -// +build detectors - package uri import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/google/go-cmp/cmp" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestURI_FromChunk(t *testing.T) { - type args struct { - ctx context.Context - data []byte - verify bool - } +var ( + validPattern = "https://kaNydBSAodo87dsm9asuiSAFtsd7.com:1234@qYY3SylY7fHP" + invalidPattern = "https://kaNydBSAodo87dsm9asuiSAFtsd7.com.1234@qYY3SylY7fHP" + keyword = "uri" +) + +func TestURI_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - wantVerificationErr bool + name string + input string + want []string }{ { - name: "found, unverified, wrong username", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://user:pass@httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx")), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_URI, - Verified: false, - Redacted: "https://user:********@httpwatch.com", - }, - }, - wantErr: false, - }, - { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://httpwatch:pass@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx")), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_URI, - Verified: true, - Redacted: "https://httpwatch:********@www.httpwatch.com", - }, - }, - wantErr: false, - }, - { - name: "found, verified, defused", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://httpwatch:pass@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx?foo=bar")), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_URI, - Verified: true, - Redacted: "https://httpwatch:********@www.httpwatch.com", - }, - }, - wantErr: false, - }, - { - name: "found, verified if not for timeout", - s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a uri secret %s within", "https://httpwatch:pass@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx")), - verify: true, - }, - want: func() []detectors.Result { - r := detectors.Result{ - DetectorType: detectorspb.DetectorType_URI, - Verified: false, - Redacted: "https://httpwatch:********@www.httpwatch.com", - } - r.SetVerificationError(fmt.Errorf("context deadline exceeded")) - return []detectors.Result{r} - }(), - wantErr: false, - wantVerificationErr: true, - }, - { - name: "bad scheme", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("file://user:pass@foo.com:123/wh/at/ever"), - verify: true, - }, - wantErr: false, + name: "valid pattern - with keyword uri", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "nothing found, password was redacted two different ways", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("Both %s and %s have been redacted within", "https://httpwatch::********@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx?foo=bar", "https://httpwatch::%2A%2A%2A%2A%2A%2A@www.httpwatch.com/httpgallery/authentication/authenticatedimage/default.aspx?foo=bar")), - verify: true, - }, - want: []detectors.Result{}, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - tt.s.allowKnownTestSites = true - got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("URI.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - // if os.Getenv("FORCE_PASS_DIFF") == "true" { - // return - // } - for i := range got { - if (got[i].VerificationError() != nil) != tt.wantVerificationErr { - t.Errorf("URI.FromData() error = %v, wantVerificationErr %v", got[i].VerificationError(), tt.want[i]) - return - } - got[i].Raw = nil - got[i].RawV2 = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("URI.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/urlscan/urlscan_integration_test.go b/pkg/detectors/urlscan/urlscan_integration_test.go new file mode 100644 index 000000000000..7008388c7fd1 --- /dev/null +++ b/pkg/detectors/urlscan/urlscan_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package urlscan + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUrlscan_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("URLSCAN") + inactiveSecret := testSecrets.MustGetField("URLSCAN_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a urlscan secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Urlscan, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a urlscan secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Urlscan, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Urlscan.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Urlscan.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/urlscan/urlscan_test.go b/pkg/detectors/urlscan/urlscan_test.go index 7008388c7fd1..d5d22eec7924 100644 --- a/pkg/detectors/urlscan/urlscan_test.go +++ b/pkg/detectors/urlscan/urlscan_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package urlscan import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUrlscan_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("URLSCAN") - inactiveSecret := testSecrets.MustGetField("URLSCAN_INACTIVE") +var ( + validPattern = "0l8zbe7ucnhdermmblvndjb2p22abcs7y7j8" + invalidPattern = "0l8zbe7ucnhdermmbl?ndjb2p22abcs7y7j8" + keyword = "urlscan" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUrlscan_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a urlscan secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Urlscan, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword urlscan", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a urlscan secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Urlscan, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Urlscan.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Urlscan.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/user/user_integration_test.go b/pkg/detectors/user/user_integration_test.go new file mode 100644 index 000000000000..970e0877e235 --- /dev/null +++ b/pkg/detectors/user/user_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package user + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUser_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("USER") + inactiveSecret := testSecrets.MustGetField("USER_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a user secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_User, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a user secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_User, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("User.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("User.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/user/user_test.go b/pkg/detectors/user/user_test.go index 970e0877e235..1847ee565f75 100644 --- a/pkg/detectors/user/user_test.go +++ b/pkg/detectors/user/user_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package user import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUser_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("USER") - inactiveSecret := testSecrets.MustGetField("USER_INACTIVE") +var ( + validPattern = "OmFxWjhZvCpOeMsgTJdZMas+dlUpr=fa7+.QOKKYvi7RKWyBeHtaLa7_rzMhLrRd" + invalidPattern = "OmFxWjhZvCpOeMsgTJdZMas+dlUpr=fa?+.QOKKYvi7RKWyBeHtaLa7_rzMhLrRd" + keyword = "user" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUser_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a user secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_User, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword user", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a user secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_User, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("User.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("User.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/userflow/userflow_integration_test.go b/pkg/detectors/userflow/userflow_integration_test.go new file mode 100644 index 000000000000..6321e454bf58 --- /dev/null +++ b/pkg/detectors/userflow/userflow_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package userflow + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUserflow_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("USERFLOW") + inactiveSecret := testSecrets.MustGetField("USERFLOW_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a userflow secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Userflow, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a userflow secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Userflow, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Userflow.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Userflow.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/userflow/userflow_test.go b/pkg/detectors/userflow/userflow_test.go index 6321e454bf58..4764804e3ee0 100644 --- a/pkg/detectors/userflow/userflow_test.go +++ b/pkg/detectors/userflow/userflow_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package userflow import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUserflow_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("USERFLOW") - inactiveSecret := testSecrets.MustGetField("USERFLOW_INACTIVE") +var ( + validPattern = "503ulfqkdfay0penkz0zkq1glayt6" + invalidPattern = "503ulfqkdfay0p?nkz0zkq1glayt6" + keyword = "userflow" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUserflow_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a userflow secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Userflow, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword userflow", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a userflow secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Userflow, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Userflow.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Userflow.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/userstack/userstack_integration_test.go b/pkg/detectors/userstack/userstack_integration_test.go new file mode 100644 index 000000000000..d436c0ddf949 --- /dev/null +++ b/pkg/detectors/userstack/userstack_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package userstack + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestUserStack_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("USERSTACK") + inactiveSecret := testSecrets.MustGetField("USERSTACK_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a userstack secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UserStack, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a userstack secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_UserStack, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("UserStack.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("UserStack.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/userstack/userstack_test.go b/pkg/detectors/userstack/userstack_test.go index d436c0ddf949..80b6383b5f60 100644 --- a/pkg/detectors/userstack/userstack_test.go +++ b/pkg/detectors/userstack/userstack_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package userstack import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestUserStack_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("USERSTACK") - inactiveSecret := testSecrets.MustGetField("USERSTACK_INACTIVE") +var ( + validPattern = "fwtfmlat0nmosek0lgvl9o8y0o1xvazt" + invalidPattern = "fwtfm?at0nmosek0lgvl9o8y0o1xvazt" + keyword = "userstack" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestUserStack_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a userstack secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UserStack, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword userstack", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a userstack secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_UserStack, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("UserStack.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("UserStack.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/vagrantcloudpersonaltoken/vagrantcloudpersonaltoken_integration_test.go b/pkg/detectors/vagrantcloudpersonaltoken/vagrantcloudpersonaltoken_integration_test.go new file mode 100644 index 000000000000..6ecbafaebea4 --- /dev/null +++ b/pkg/detectors/vagrantcloudpersonaltoken/vagrantcloudpersonaltoken_integration_test.go @@ -0,0 +1,128 @@ +//go:build detectors +// +build detectors + +package vagrantcloudpersonaltoken + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVagrantcloudpersonaltoken_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VAGRANTCLOUDPERSONALTOKEN") + inactiveSecret := testSecrets.MustGetField("VAGRANTCLOUDPERSONALTOKEN_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vagrantcloudpersonaltoken secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VagrantCloudPersonalToken, + Verified: true, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vagrantcloudpersonaltoken secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VagrantCloudPersonalToken, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Vagrantcloudpersonaltoken.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("Vagrantcloudpersonaltoken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vagrantcloudpersonaltoken/vagrantcloudpersonaltoken_test.go b/pkg/detectors/vagrantcloudpersonaltoken/vagrantcloudpersonaltoken_test.go index 6ecbafaebea4..9136f1c3cb52 100644 --- a/pkg/detectors/vagrantcloudpersonaltoken/vagrantcloudpersonaltoken_test.go +++ b/pkg/detectors/vagrantcloudpersonaltoken/vagrantcloudpersonaltoken_test.go @@ -1,127 +1,90 @@ -//go:build detectors -// +build detectors - package vagrantcloudpersonaltoken import ( "context" "fmt" "testing" - "time" "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVagrantcloudpersonaltoken_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VAGRANTCLOUDPERSONALTOKEN") - inactiveSecret := testSecrets.MustGetField("VAGRANTCLOUDPERSONALTOKEN_INACTIVE") +var ( + validPattern = "3xPXvIQhdxEXQGwatlasv1x1zK093TwRXADtoR720b1kCixmaWWgo8Ivyir5FsMFVUoCQwWYtGXxLChctCi8CRoSk2" + invalidPattern = "3xPXvIQhdxEXQGwatlasv1x1zK093TwRXADtoR720b1kC?xmaWWgo8Ivyir5FsMFVUoCQwWYtGXxLChctCi8CRoSk2" + keyword = "vagrantcloudpersonaltoken" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVagrantcloudpersonaltoken_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - wantVerificationErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vagrantcloudpersonaltoken secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VagrantCloudPersonalToken, - Verified: true, - }, - }, - wantErr: false, - wantVerificationErr: false, + name: "valid pattern - with keyword vagrantcloudpersonaltoken", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vagrantcloudpersonaltoken secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VagrantCloudPersonalToken, - Verified: false, - }, - }, - wantErr: false, - wantVerificationErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, - wantVerificationErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Vagrantcloudpersonaltoken.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) } - if (got[i].VerificationError() != nil) != tt.wantVerificationErr { - t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + return + } + + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } - ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") - if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { - t.Errorf("Vagrantcloudpersonaltoken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) - } + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) } }) } diff --git a/pkg/detectors/vatlayer/vatlayer_integration_test.go b/pkg/detectors/vatlayer/vatlayer_integration_test.go new file mode 100644 index 000000000000..57ac61124fe3 --- /dev/null +++ b/pkg/detectors/vatlayer/vatlayer_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package vatlayer + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVatLayer_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VATLAYER") + inactiveSecret := testSecrets.MustGetField("VATLAYER_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vatlayer secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VatLayer, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vatlayer secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VatLayer, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("VatLayer.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("VatLayer.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vatlayer/vatlayer_test.go b/pkg/detectors/vatlayer/vatlayer_test.go index 57ac61124fe3..272a355d100e 100644 --- a/pkg/detectors/vatlayer/vatlayer_test.go +++ b/pkg/detectors/vatlayer/vatlayer_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package vatlayer import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVatLayer_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VATLAYER") - inactiveSecret := testSecrets.MustGetField("VATLAYER_INACTIVE") +var ( + validPattern = "uj05sglnibecnutz4ky665kpjcqgyara" + invalidPattern = "uj05sglnibecnutz?ky665kpjcqgyara" + keyword = "vatlayer" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVatLayer_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vatlayer secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VatLayer, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword vatlayer", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vatlayer secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VatLayer, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("VatLayer.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("VatLayer.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/vbout/vbout_integration_test.go b/pkg/detectors/vbout/vbout_integration_test.go new file mode 100644 index 000000000000..c8eaa6575b46 --- /dev/null +++ b/pkg/detectors/vbout/vbout_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package vbout + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVbout_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VBOUT") + inactiveSecret := testSecrets.MustGetField("VBOUT_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vbout secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vbout, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vbout secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vbout, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Vbout.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Vbout.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vbout/vbout_test.go b/pkg/detectors/vbout/vbout_test.go index c8eaa6575b46..2382101d3fb1 100644 --- a/pkg/detectors/vbout/vbout_test.go +++ b/pkg/detectors/vbout/vbout_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package vbout import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVbout_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VBOUT") - inactiveSecret := testSecrets.MustGetField("VBOUT_INACTIVE") +var ( + validPattern = "9734741337193905126710251" + invalidPattern = "97?4741337193905126710251" + keyword = "vbout" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVbout_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vbout secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vbout, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword vbout", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vbout secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vbout, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Vbout.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Vbout.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/vercel/vercel_integration_test.go b/pkg/detectors/vercel/vercel_integration_test.go new file mode 100644 index 000000000000..d9726d83fecf --- /dev/null +++ b/pkg/detectors/vercel/vercel_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package vercel + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVercel_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VERCEL_TOKEN") + inactiveSecret := testSecrets.MustGetField("VERCEL_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vercel secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vercel, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vercel secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vercel, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Vercel.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Vercel.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vercel/vercel_test.go b/pkg/detectors/vercel/vercel_test.go index d9726d83fecf..47d7797b404d 100644 --- a/pkg/detectors/vercel/vercel_test.go +++ b/pkg/detectors/vercel/vercel_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package vercel import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVercel_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VERCEL_TOKEN") - inactiveSecret := testSecrets.MustGetField("VERCEL_INACTIVE") +var ( + validPattern = "gGP4GJRR3TOpU7wzTY9UiSTQ" + invalidPattern = "gGP4GJRR3TOp?7wzTY9UiSTQ" + keyword = "vercel" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVercel_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vercel secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vercel, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword vercel", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vercel secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vercel, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Vercel.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Vercel.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/verimail/verimail_integration_test.go b/pkg/detectors/verimail/verimail_integration_test.go new file mode 100644 index 000000000000..87165a4c00f4 --- /dev/null +++ b/pkg/detectors/verimail/verimail_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package verimail + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVerimail_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VERIMAIL_TOKEN") + inactiveSecret := testSecrets.MustGetField("VERIMAIL_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a verimail secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Verimail, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a verimail secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Verimail, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Verimail.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Verimail.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/verimail/verimail_test.go b/pkg/detectors/verimail/verimail_test.go index 87165a4c00f4..ef019dde7192 100644 --- a/pkg/detectors/verimail/verimail_test.go +++ b/pkg/detectors/verimail/verimail_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package verimail import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVerimail_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VERIMAIL_TOKEN") - inactiveSecret := testSecrets.MustGetField("VERIMAIL_INACTIVE") +var ( + validPattern = "G0PW4TG34CP100EJD73BGE79Y2WK1CTJ" + invalidPattern = "G0PW4TG34CP100EJ?73BGE79Y2WK1CTJ" + keyword = "verimail" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVerimail_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a verimail secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Verimail, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword verimail", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a verimail secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Verimail, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Verimail.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Verimail.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/veriphone/veriphone_integration_test.go b/pkg/detectors/veriphone/veriphone_integration_test.go new file mode 100644 index 000000000000..af712d5db67d --- /dev/null +++ b/pkg/detectors/veriphone/veriphone_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package veriphone + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVeriphone_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VERIPHONE") + inactiveSecret := testSecrets.MustGetField("VERIPHONE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a veriphone secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Veriphone, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a veriphone secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Veriphone, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Veriphone.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Veriphone.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/veriphone/veriphone_test.go b/pkg/detectors/veriphone/veriphone_test.go index af712d5db67d..fa62c8c7a5d5 100644 --- a/pkg/detectors/veriphone/veriphone_test.go +++ b/pkg/detectors/veriphone/veriphone_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package veriphone import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVeriphone_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VERIPHONE") - inactiveSecret := testSecrets.MustGetField("VERIPHONE_INACTIVE") +var ( + validPattern = "T7OWX9ATDTVE1ITA1HON9MAASLBDWNZA" + invalidPattern = "T7OWX9ATDTVE?ITA1HON9MAASLBDWNZA" + keyword = "veriphone" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVeriphone_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a veriphone secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Veriphone, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword veriphone", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a veriphone secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Veriphone, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Veriphone.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Veriphone.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/versioneye/versioneye_integration_test.go b/pkg/detectors/versioneye/versioneye_integration_test.go new file mode 100644 index 000000000000..336405d4206e --- /dev/null +++ b/pkg/detectors/versioneye/versioneye_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package versioneye + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVersionEye_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VERSIONEYE") + inactiveSecret := testSecrets.MustGetField("VERSIONEYE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a versioneye secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VersionEye, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a versioneye secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VersionEye, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("VersionEye.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("VersionEye.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/versioneye/versioneye_test.go b/pkg/detectors/versioneye/versioneye_test.go index 336405d4206e..87ddd6345367 100644 --- a/pkg/detectors/versioneye/versioneye_test.go +++ b/pkg/detectors/versioneye/versioneye_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package versioneye import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVersionEye_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VERSIONEYE") - inactiveSecret := testSecrets.MustGetField("VERSIONEYE_INACTIVE") +var ( + validPattern = "pSbrVZlhasYd50mEJEc2n7lm3UTut3ZOqBVk-uZe" + invalidPattern = "pSbrVZlhasYd50mEJEc2?7lm3UTut3ZOqBVk-uZe" + keyword = "versioneye" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVersionEye_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a versioneye secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VersionEye, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword versioneye", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a versioneye secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VersionEye, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("VersionEye.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("VersionEye.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/viewneo/viewneo_integration_test.go b/pkg/detectors/viewneo/viewneo_integration_test.go new file mode 100644 index 000000000000..ff6c82645570 --- /dev/null +++ b/pkg/detectors/viewneo/viewneo_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package viewneo + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestViewneo_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VIEWNEO") + inactiveSecret := testSecrets.MustGetField("VIEWNEO_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a viewneo secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Viewneo, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a viewneo secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Viewneo, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Viewneo.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Viewneo.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/viewneo/viewneo_test.go b/pkg/detectors/viewneo/viewneo_test.go index ff6c82645570..bc0aaacb96ed 100644 --- a/pkg/detectors/viewneo/viewneo_test.go +++ b/pkg/detectors/viewneo/viewneo_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package viewneo import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestViewneo_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VIEWNEO") - inactiveSecret := testSecrets.MustGetField("VIEWNEO_INACTIVE") +var ( + validPattern = "cZZ32f1UDFCqsSP6uUlFyPLrXnGhgunvxc3jRsTO1mygukFtXxmk3sV0Q4PiGW8TstPPagjg8o3N0Jp25RKDJUOssch6rbSBVPc3R5vsRSa7y00CKKuu6T6N.2oV6hZlvOR7Jm3L6PzRLMbPRburA2FUfRlRLktcCbt2nBX1iyAfdMv8JvCjAhUJHT52PhiAT3ca7FNd5q5ZXAkn87LnuQhc5UHyuwD8gcWstOghUHZ20tcz7SjVuKyWZFgODlW2WczXqHKxaNhWYz4.839QXG7zlPYdYNhfbvQZe1zHr6bbbjIQYUs2q5whTtUWm8tCMWaOtm_DEZ_xKO5RUrajoClRedRiGK0fFAMshDSyAROOa7NXcE4WM_AuURDSif51QmcWY5HRITdd7y639Zc2Sz1kkUz-Ks_Aqe7xy0VUOlA8m4w2A7IfQ2iDtUeAlWIz1vsOihDxWeNqvTj5D5JOQcyRCiCfTfDWptrJCkKsMWMcDNRE773ypzQVn3r6VSVC63UqdT5Et5jpS5C1wFMuJDei5w7t4vPBTbodepVLtEkn4HcuyTEt0m-Rh_LIxMShlL56AeC7bVBNvvRpNMi_YT3wTozsXvAXEDS1bdOcD_MLk7-g8L1FfeBZxTnRfLR81idE4qR7ecTeNgfVvuiddb-IGrIAefADZ_Vzl49E3amY7twA7EqX04lBZiVfZsO1R0BlzsCLqQ10fsleLl-S00R01G1Fn2e2gkEkRkwOfxbA7BdTYJwz3s1m7rC2HmQLyT_-h8qE30fGzWkoq7INPSTmJ0EJOPDRY3TZi7axUSDEjZbF8TwXcD3jFDmaAYD3D4E5NSKnILnacXC-kfGZQcP4bcrPbHa4BoNN3kyt" + invalidPattern = "c?Z32f1UDFCqsSP6uUlFyPLrXnGhgunvxc3jRsTO1mygukFtXxmk3sV0Q4PiGW8TstPPagjg8o3N0Jp25RKDJUOssch6rbSBVPc3R5vsRSa7y00CKKuu6T6N.2oV6hZlvOR7Jm3L6PzRLMbPRburA2FUfRlRLktcCbt2nBX1iyAfdMv8JvCjAhUJHT52PhiAT3ca7FNd5q5ZXAkn87LnuQhc5UHyuwD8gcWstOghUHZ20tcz7SjVuKyWZFgODlW2WczXqHKxaNhWYz4.839QXG7zlPYdYNhfbvQZe1zHr6bbbjIQYUs2q5whTtUWm8tCMWaOtm_DEZ_xKO5RUrajoClRedRiGK0fFAMshDSyAROOa7NXcE4WM_AuURDSif51QmcWY5HRITdd7y639Zc2Sz1kkUz-Ks_Aqe7xy0VUOlA8m4w2A7IfQ2iDtUeAlWIz1vsOihDxWeNqvTj5D5JOQcyRCiCfTfDWptrJCkKsMWMcDNRE773ypzQVn3r6VSVC63UqdT5Et5jpS5C1wFMuJDei5w7t4vPBTbodepVLtEkn4HcuyTEt0m-Rh_LIxMShlL56AeC7bVBNvvRpNMi_YT3wTozsXvAXEDS1bdOcD_MLk7-g8L1FfeBZxTnRfLR81idE4qR7ecTeNgfVvuiddb-IGrIAefADZ_Vzl49E3amY7twA7EqX04lBZiVfZsO1R0BlzsCLqQ10fsleLl-S00R01G1Fn2e2gkEkRkwOfxbA7BdTYJwz3s1m7rC2HmQLyT_-h8qE30fGzWkoq7INPSTmJ0EJOPDRY3TZi7axUSDEjZbF8TwXcD3jFDmaAYD3D4E5NSKnILnacXC-kfGZQcP4bcrPbHa4BoNN3kyt" + keyword = "viewneo" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestViewneo_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a viewneo secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Viewneo, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword viewneo", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a viewneo secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Viewneo, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Viewneo.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Viewneo.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/virustotal/virustotal_integration_test.go b/pkg/detectors/virustotal/virustotal_integration_test.go new file mode 100644 index 000000000000..b416c118effd --- /dev/null +++ b/pkg/detectors/virustotal/virustotal_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package virustotal + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVirusTotal_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VIRUSTOTAL") + inactiveSecret := testSecrets.MustGetField("VIRUSTOTAL_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a virustotal secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VirusTotal, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a virustotal secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VirusTotal, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("VirusTotal.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("VirusTotal.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/virustotal/virustotal_test.go b/pkg/detectors/virustotal/virustotal_test.go index b416c118effd..199bd4897844 100644 --- a/pkg/detectors/virustotal/virustotal_test.go +++ b/pkg/detectors/virustotal/virustotal_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package virustotal import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVirusTotal_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VIRUSTOTAL") - inactiveSecret := testSecrets.MustGetField("VIRUSTOTAL_INACTIVE") +var ( + validPattern = "67d334aba3419b4d090ebeecac11b8830cafa388f9b6e3bfa97d03f4d25dbf46" + invalidPattern = "67d334aba3419b4d?90ebeecac11b8830cafa388f9b6e3bfa97d03f4d25dbf46" + keyword = "virustotal" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVirusTotal_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a virustotal secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VirusTotal, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword virustotal", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, + }, + { + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a virustotal secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VirusTotal, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("VirusTotal.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("VirusTotal.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/visualcrossing/visualcrossing_integration_test.go b/pkg/detectors/visualcrossing/visualcrossing_integration_test.go new file mode 100644 index 000000000000..9c570ee36938 --- /dev/null +++ b/pkg/detectors/visualcrossing/visualcrossing_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package visualcrossing + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVisualcrossing_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VISUALCROSSING") + inactiveSecret := testSecrets.MustGetField("VISUALCROSSING_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a visualcrossing secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VisualCrossing, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a visualcrossing secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VisualCrossing, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Visualcrossing.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Visualcrossing.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/visualcrossing/visualcrossing_test.go b/pkg/detectors/visualcrossing/visualcrossing_test.go index 9c570ee36938..98e0a90b7945 100644 --- a/pkg/detectors/visualcrossing/visualcrossing_test.go +++ b/pkg/detectors/visualcrossing/visualcrossing_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package visualcrossing import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVisualcrossing_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VISUALCROSSING") - inactiveSecret := testSecrets.MustGetField("VISUALCROSSING_INACTIVE") +var ( + validPattern = "9D39V79EZ6V3RO0YFV3XUOGLJ" + invalidPattern = "9D39V79EZ6V3?O0YFV3XUOGLJ" + keyword = "visualcrossing" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVisualcrossing_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a visualcrossing secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VisualCrossing, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword visualcrossing", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, + }, + { + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a visualcrossing secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VisualCrossing, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Visualcrossing.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Visualcrossing.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/voicegain/voicegain_integration_test.go b/pkg/detectors/voicegain/voicegain_integration_test.go new file mode 100644 index 000000000000..4a3ce826faa5 --- /dev/null +++ b/pkg/detectors/voicegain/voicegain_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package voicegain + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVoicegain_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VOICEGAIN") + inactiveSecret := testSecrets.MustGetField("VOICEGAIN_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voicegain secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Voicegain, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voicegain secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Voicegain, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Voicegain.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Voicegain.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/voicegain/voicegain_test.go b/pkg/detectors/voicegain/voicegain_test.go index 4a3ce826faa5..ca914bc77c5e 100644 --- a/pkg/detectors/voicegain/voicegain_test.go +++ b/pkg/detectors/voicegain/voicegain_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package voicegain import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVoicegain_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VOICEGAIN") - inactiveSecret := testSecrets.MustGetField("VOICEGAIN_INACTIVE") +var ( + validPattern = "ey7nVWxLrVTPaolyqAYxmedMyueqpSQ6eIfX.ey582H2Ghcpty-_E0Wn4syTEdwAdXRF6eYyKgHveC26fBGQ4BiZIS30BcPrr1JG2xiFV4-bEOzVAMcWEQkh-THo0Z3RT6Os45ZGD28dlaiicZH.i4G5a88NkP82UpJoh3vW-sHOBx-uX1tYwyGcxykJO55" + invalidPattern = "ay7nVWxLrVTPaolyqAYxmedMyueqpSQ6eIfX.ey582H2Ghcpty-_E0Wn4syTEdwAdXRF6eYyKgHveC26fBGQ4BiZIS30BcPrr1JG2xiFV4-bEOzVAMcWEQkh-THo0Z3RT6Os45ZGD28dlaiicZH.i4G5a88NkP82UpJoh3vW-sHOBx-uX1tYwyGcxykJO55" + keyword = "voicegain" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVoicegain_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a voicegain secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Voicegain, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword voicegain", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a voicegain secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Voicegain, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Voicegain.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Voicegain.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/voodoosms/voodoosms_integration_test.go b/pkg/detectors/voodoosms/voodoosms_integration_test.go new file mode 100644 index 000000000000..3728dde96026 --- /dev/null +++ b/pkg/detectors/voodoosms/voodoosms_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package voodoosms + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVoodooSMS_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VOODOOSMS") + inactiveSecret := testSecrets.MustGetField("VOODOOSMS_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voodoosms secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VoodooSMS, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voodoosms secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VoodooSMS, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("VoodooSMS.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("VoodooSMS.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/voodoosms/voodoosms_test.go b/pkg/detectors/voodoosms/voodoosms_test.go index 3728dde96026..5711c0de6127 100644 --- a/pkg/detectors/voodoosms/voodoosms_test.go +++ b/pkg/detectors/voodoosms/voodoosms_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package voodoosms import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVoodooSMS_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VOODOOSMS") - inactiveSecret := testSecrets.MustGetField("VOODOOSMS_INACTIVE") +var ( + validPattern = "qpbVNj3Fdxc38gIixFqXAndxfSvoafOWztBMIzsl0Y262i" + invalidPattern = "qpbVNj3Fd?c38gIixFqXAndxfSvoafOWztBMIzsl0Y262i" + keyword = "voodoosms" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVoodooSMS_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a voodoosms secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VoodooSMS, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword voodoosms", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a voodoosms secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VoodooSMS, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("VoodooSMS.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("VoodooSMS.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/vouchery/vouchery_integration_test.go b/pkg/detectors/vouchery/vouchery_integration_test.go new file mode 100644 index 000000000000..bf0f879703dd --- /dev/null +++ b/pkg/detectors/vouchery/vouchery_integration_test.go @@ -0,0 +1,121 @@ +//go:build detectors +// +build detectors + +package vouchery + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVouchery_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VOUCHERY") + subdomain := testSecrets.MustGetField("VOUCHERY_SUBDOMAIN") + inactiveSecret := testSecrets.MustGetField("VOUCHERY_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vouchery secret %s within vouchery %s", secret, subdomain)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vouchery, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vouchery secret %s within vouchery %s but not valid", inactiveSecret, subdomain)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vouchery, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Vouchery.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Vouchery.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vouchery/vouchery_test.go b/pkg/detectors/vouchery/vouchery_test.go index bf0f879703dd..81bfcb5d82e8 100644 --- a/pkg/detectors/vouchery/vouchery_test.go +++ b/pkg/detectors/vouchery/vouchery_test.go @@ -1,121 +1,83 @@ -//go:build detectors -// +build detectors - package vouchery import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVouchery_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VOUCHERY") - subdomain := testSecrets.MustGetField("VOUCHERY_SUBDOMAIN") - inactiveSecret := testSecrets.MustGetField("VOUCHERY_INACTIVE") +var ( + validKey = "7ts1czbnd621chpqufnon62o32w0z2iuf15x" + invalidKey = "7ts1czb?d621chpqufnon62o32w0z2iuf15x" + validSub = "u;Yb0#E$0" + invalidSub = "u;Yb?#E$0" + keyword = "vouchery" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVouchery_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vouchery secret %s within vouchery %s", secret, subdomain)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vouchery, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword vouchery", + input: fmt.Sprintf("%s '%s'\n\n%s '%s'\n", keyword, validKey, keyword, validSub), + want: []string{validKey + validSub}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vouchery secret %s within vouchery %s but not valid", inactiveSecret, subdomain)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vouchery, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "invalid pattern", + input: fmt.Sprintf("%s '%s'\n\n%s '%s'\n", keyword, invalidKey, keyword, invalidSub), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Vouchery.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Vouchery.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/vpnapi/vpnapi_integration_test.go b/pkg/detectors/vpnapi/vpnapi_integration_test.go new file mode 100644 index 000000000000..34583d05b839 --- /dev/null +++ b/pkg/detectors/vpnapi/vpnapi_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package vpnapi + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVpnapi_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VPNAPI") + inactiveSecret := testSecrets.MustGetField("VPNAPI_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vpnapi secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vpnapi, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vpnapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vpnapi, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Vpnapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Vpnapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vpnapi/vpnapi_test.go b/pkg/detectors/vpnapi/vpnapi_test.go index 34583d05b839..438806f947a7 100644 --- a/pkg/detectors/vpnapi/vpnapi_test.go +++ b/pkg/detectors/vpnapi/vpnapi_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package vpnapi import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVpnapi_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VPNAPI") - inactiveSecret := testSecrets.MustGetField("VPNAPI_INACTIVE") +var ( + validPattern = "LAqmsNMbdWkEmZW053axrvWTUKVRbcKN" + invalidPattern = "LAqmsNMbdWkEmZW0?3axrvWTUKVRbcKN" + keyword = "vpnapi" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVpnapi_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vpnapi secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vpnapi, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword vpnapi", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vpnapi secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vpnapi, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Vpnapi.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Vpnapi.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/vultrapikey/vultrapikey_integration_test.go b/pkg/detectors/vultrapikey/vultrapikey_integration_test.go new file mode 100644 index 000000000000..575713d4f005 --- /dev/null +++ b/pkg/detectors/vultrapikey/vultrapikey_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package vultrapikey + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVultrApiKey_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VULTR_API_TOKEN") + inactiveSecret := testSecrets.MustGetField("VULTR_API_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vultrapikey secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VultrApiKey, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vultrapikey secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_VultrApiKey, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("VultrApiKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("VultrApiKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vultrapikey/vultrapikey_test.go b/pkg/detectors/vultrapikey/vultrapikey_test.go index 575713d4f005..c894f154dbda 100644 --- a/pkg/detectors/vultrapikey/vultrapikey_test.go +++ b/pkg/detectors/vultrapikey/vultrapikey_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package vultrapikey import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVultrApiKey_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors3") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VULTR_API_TOKEN") - inactiveSecret := testSecrets.MustGetField("VULTR_API_INACTIVE") +var ( + validPattern = "F7CTEQ0FK6XSPCSG24XMBULREVNG4AL3RTQK" + invalidPattern = "F7CTEQ0FK6?SPCSG24XMBULREVNG4AL3RTQK" + keyword = "vultrapikey" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVultrApiKey_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vultrapikey secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VultrApiKey, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword vultrapikey", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vultrapikey secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_VultrApiKey, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("VultrApiKey.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("VultrApiKey.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } } diff --git a/pkg/detectors/vyte/vyte_integration_test.go b/pkg/detectors/vyte/vyte_integration_test.go new file mode 100644 index 000000000000..7cce3162fdca --- /dev/null +++ b/pkg/detectors/vyte/vyte_integration_test.go @@ -0,0 +1,120 @@ +//go:build detectors +// +build detectors + +package vyte + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVyte_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VYTE") + inactiveSecret := testSecrets.MustGetField("VYTE_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vyte secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vyte, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a vyte secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Vyte, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := Scanner{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Vyte.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("Vyte.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/vyte/vyte_test.go b/pkg/detectors/vyte/vyte_test.go index 7cce3162fdca..4fac5b388bf6 100644 --- a/pkg/detectors/vyte/vyte_test.go +++ b/pkg/detectors/vyte/vyte_test.go @@ -1,120 +1,91 @@ -//go:build detectors -// +build detectors - package vyte import ( "context" "fmt" "testing" - "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/google/go-cmp/cmp" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" ) -func TestVyte_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("VYTE") - inactiveSecret := testSecrets.MustGetField("VYTE_INACTIVE") +var ( + validPattern = "ii86dga95chsw6qe6dt0oh7nqn5udhqsk3fhaizktnv770zk81" + invalidPattern = "ii86dga95chsw6qe6dt0oh7?qn5udhqsk3fhaizktnv770zk81" + keyword = "vyte" +) - type args struct { - ctx context.Context - data []byte - verify bool - } +func TestVyte_Pattern(t *testing.T) { + d := Scanner{} + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + input string + want []string }{ { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vyte secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vyte, - Verified: true, - }, - }, - wantErr: false, + name: "valid pattern - with keyword vyte", + input: fmt.Sprintf("%s token = '%s'", keyword, validPattern), + want: []string{validPattern}, }, { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a vyte secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_Vyte, - Verified: false, - }, - }, - wantErr: false, + name: "valid pattern - ignore duplicate", + input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern), + want: []string{validPattern}, }, { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, + name: "valid pattern - key out of prefix range", + input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern), + want: []string{}, + }, + { + name: "invalid pattern", + input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern), + want: []string{}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("Vyte.FromData() error = %v, wantErr %v", err, tt.wantErr) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input)) + if len(matchedDetectors) == 0 { + t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input) return } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("Vyte.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + + if len(results) != len(test.want) { + if len(results) == 0 { + t.Errorf("did not receive result") + } else { + t.Errorf("expected %d results, only received %d", len(test.want), len(results)) + } + return } - }) - } -} -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) + actual := make(map[string]struct{}, len(results)) + for _, r := range results { + if len(r.RawV2) > 0 { + actual[string(r.RawV2)] = struct{}{} + } else { + actual[string(r.Raw)] = struct{}{} } } + expected := make(map[string]struct{}, len(test.want)) + for _, v := range test.want { + expected[v] = struct{}{} + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("%s diff: (-want +got)\n%s", test.name, diff) + } }) } }