From 57620fe1dfa3bb2e06f84c69ecddbe9762873777 Mon Sep 17 00:00:00 2001 From: Richard Gomez Date: Thu, 21 Nov 2024 18:47:40 -0500 Subject: [PATCH] feat(uri): update detector --- pkg/detectors/uri/uri.go | 112 +++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 40 deletions(-) diff --git a/pkg/detectors/uri/uri.go b/pkg/detectors/uri/uri.go index f97f3198ad1c..111869ea9a8c 100644 --- a/pkg/detectors/uri/uri.go +++ b/pkg/detectors/uri/uri.go @@ -2,6 +2,9 @@ package uri import ( "context" + "errors" + "io" + "net" "net/http" "net/url" "strings" @@ -9,6 +12,8 @@ import ( regexp "github.com/wasilibs/go-re2" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple" + logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) @@ -19,85 +24,114 @@ type Scanner struct { } // Ensure the Scanner satisfies the interface at compile time. -var _ detectors.Detector = (*Scanner)(nil) -var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil) +var _ interface { + detectors.Detector + detectors.CustomFalsePositiveChecker +} = (*Scanner)(nil) var ( keyPat = regexp.MustCompile(`\b(?:https?:)?\/\/[\S]{3,50}:([\S]{3,50})@[-.%\w\/:]+\b`) // TODO: make local addr opt-out defaultClient = detectors.DetectorHttpClientWithNoLocalAddresses + + hostNotFoundCache = simple.NewCache[struct{}]() ) // Keywords are used for efficiently pre-filtering chunks. // Use identifiers in the secret preferably, or the provider name. func (s Scanner) Keywords() []string { - return []string{"http"} + return []string{"http://", "https://"} +} + +func (s Scanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_URI +} + +func (s Scanner) Description() string { + return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction." } // FromData will find and optionally verify URI secrets in a given set of bytes. func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + logger := logContext.AddLogger(ctx).Logger().WithName("uri") dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - - for _, match := range matches { - + uriMatches := make(map[string]string) + for _, matches := range keyPat.FindAllStringSubmatch(dataStr, -1) { + uriMatch := matches[0] if !s.allowKnownTestSites { - if strings.Contains(match[0], "httpbin.org") { + if strings.Contains(uriMatch, "httpbin.org") { continue } - if strings.Contains(match[0], "httpwatch.com") { + if strings.Contains(uriMatch, "httpwatch.com") { continue } } - urlMatch := match[0] - password := match[1] - + password := matches[1] // Skip findings where the password only has "*" characters, this is a redacted password // Also include the url encoded "*" characters: "%2A" if strings.Trim(password, "*") == "" || strings.Trim(password, "%2A") == "" { continue } - parsedURL, err := url.Parse(urlMatch) + uriMatches[uriMatch] = password + } + + for uri, password := range uriMatches { + parsedURL, err := url.Parse(uri) if err != nil { + // URL is invalid. continue } + // URL does not contain a password. if _, ok := parsedURL.User.Password(); !ok { continue } - rawURL, _ := url.Parse(urlMatch) - rawURLStr := rawURL.String() + // Safe, I think? (https://github.com/golang/go/issues/38351) + rawUrl := *parsedURL + rawUrlWithPath := rawUrl.String() // Removing the path causes possible deduplication issues if some paths have basic auth and some do not. - rawURL.Path = "" + rawUrl.Path = "" + rawUrlWithoutPath := rawUrl.String() - s1 := detectors.Result{ + r := detectors.Result{ DetectorType: detectorspb.DetectorType_URI, - Raw: []byte(rawURL.String()), - RawV2: []byte(rawURLStr), - Redacted: detectors.RedactURL(*rawURL), + Raw: []byte(rawUrlWithoutPath), + RawV2: []byte(rawUrlWithPath), + Redacted: detectors.RedactURL(*parsedURL), } if verify { + hostname := parsedURL.Hostname() + if hostNotFoundCache.Exists(hostname) { + logger.V(3).Info("Skipping uri: no such host", "host", hostname) + continue + } + if s.client == nil { s.client = defaultClient } - isVerified, verificationError := verifyURL(ctx, s.client, parsedURL) - s1.Verified = isVerified - s1.SetVerificationError(verificationError, password) + isVerified, vErr := verifyURL(ctx, s.client, parsedURL) + r.Verified = isVerified + if vErr != nil { + var dnsErr *net.DNSError + if errors.As(vErr, &dnsErr) && dnsErr.IsNotFound { + hostNotFoundCache.Set(hostname, struct{}{}) + } + r.SetVerificationError(vErr, password) + } } - if !s1.Verified { + if !r.Verified { // Skip unverified findings where the password starts with a `$` - it's almost certainly a variable. if strings.HasPrefix(password, "$") { continue } } - - results = append(results, s1) + results = append(results, r) } return results, nil @@ -118,16 +152,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro u.User = nil nonCredentialedURL := u.String() - req, err := http.NewRequestWithContext(ctx, "GET", credentialedURL, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, credentialedURL, nil) if err != nil { return false, err } - req = req.WithContext(ctx) + credentialedRes, err := client.Do(req) if err != nil { return false, err } - credentialedRes.Body.Close() + defer func() { + _, _ = io.Copy(io.Discard, credentialedRes.Body) + _ = credentialedRes.Body.Close() + }() // If the credentialed URL returns a non 2XX code, we can assume it's a false positive. if credentialedRes.StatusCode < 200 || credentialedRes.StatusCode > 299 { @@ -136,16 +173,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro time.Sleep(time.Millisecond * 10) - req, err = http.NewRequestWithContext(ctx, "GET", nonCredentialedURL, nil) + req, err = http.NewRequestWithContext(ctx, http.MethodGet, nonCredentialedURL, nil) if err != nil { return false, err } - req = req.WithContext(ctx) + nonCredentialedRes, err := client.Do(req) if err != nil { return false, err } - nonCredentialedRes.Body.Close() + defer func() { + _, _ = io.Copy(io.Discard, nonCredentialedRes.Body) + _ = nonCredentialedRes.Body.Close() + }() // If the non-credentialed URL returns a non 400-428 code and basic auth header, we can assume it's verified now. if nonCredentialedRes.StatusCode >= 400 && nonCredentialedRes.StatusCode < 429 { @@ -156,11 +196,3 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro return false, nil } - -func (s Scanner) Type() detectorspb.DetectorType { - return detectorspb.DetectorType_URI -} - -func (s Scanner) Description() string { - return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction." -}