Skip to content

Commit

Permalink
feat(uri): update detector
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz committed Dec 21, 2024
1 parent ddc015e commit 57620fe
Showing 1 changed file with 72 additions and 40 deletions.
112 changes: 72 additions & 40 deletions pkg/detectors/uri/uri.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@ package uri

import (
"context"
"errors"
"io"
"net"
"net/http"
"net/url"
"strings"
"time"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)
Expand All @@ -19,85 +24,114 @@ type Scanner struct {
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)
var _ detectors.CustomFalsePositiveChecker = (*Scanner)(nil)
var _ interface {
detectors.Detector
detectors.CustomFalsePositiveChecker
} = (*Scanner)(nil)

var (
keyPat = regexp.MustCompile(`\b(?:https?:)?\/\/[\S]{3,50}:([\S]{3,50})@[-.%\w\/:]+\b`)

// TODO: make local addr opt-out
defaultClient = detectors.DetectorHttpClientWithNoLocalAddresses

hostNotFoundCache = simple.NewCache[struct{}]()
)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"http"}
return []string{"http://", "https://"}
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_URI
}

func (s Scanner) Description() string {
return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction."
}

// FromData will find and optionally verify URI secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
logger := logContext.AddLogger(ctx).Logger().WithName("uri")
dataStr := string(data)

matches := keyPat.FindAllStringSubmatch(dataStr, -1)

for _, match := range matches {

uriMatches := make(map[string]string)
for _, matches := range keyPat.FindAllStringSubmatch(dataStr, -1) {
uriMatch := matches[0]
if !s.allowKnownTestSites {
if strings.Contains(match[0], "httpbin.org") {
if strings.Contains(uriMatch, "httpbin.org") {
continue
}
if strings.Contains(match[0], "httpwatch.com") {
if strings.Contains(uriMatch, "httpwatch.com") {
continue
}
}

urlMatch := match[0]
password := match[1]

password := matches[1]
// Skip findings where the password only has "*" characters, this is a redacted password
// Also include the url encoded "*" characters: "%2A"
if strings.Trim(password, "*") == "" || strings.Trim(password, "%2A") == "" {
continue
}

parsedURL, err := url.Parse(urlMatch)
uriMatches[uriMatch] = password
}

for uri, password := range uriMatches {
parsedURL, err := url.Parse(uri)
if err != nil {
// URL is invalid.
continue
}
// URL does not contain a password.
if _, ok := parsedURL.User.Password(); !ok {
continue
}

rawURL, _ := url.Parse(urlMatch)
rawURLStr := rawURL.String()
// Safe, I think? (https://github.com/golang/go/issues/38351)
rawUrl := *parsedURL
rawUrlWithPath := rawUrl.String()
// Removing the path causes possible deduplication issues if some paths have basic auth and some do not.
rawURL.Path = ""
rawUrl.Path = ""
rawUrlWithoutPath := rawUrl.String()

s1 := detectors.Result{
r := detectors.Result{
DetectorType: detectorspb.DetectorType_URI,
Raw: []byte(rawURL.String()),
RawV2: []byte(rawURLStr),
Redacted: detectors.RedactURL(*rawURL),
Raw: []byte(rawUrlWithoutPath),
RawV2: []byte(rawUrlWithPath),
Redacted: detectors.RedactURL(*parsedURL),
}

if verify {
hostname := parsedURL.Hostname()
if hostNotFoundCache.Exists(hostname) {
logger.V(3).Info("Skipping uri: no such host", "host", hostname)
continue
}

if s.client == nil {
s.client = defaultClient
}
isVerified, verificationError := verifyURL(ctx, s.client, parsedURL)
s1.Verified = isVerified
s1.SetVerificationError(verificationError, password)
isVerified, vErr := verifyURL(ctx, s.client, parsedURL)
r.Verified = isVerified
if vErr != nil {
var dnsErr *net.DNSError
if errors.As(vErr, &dnsErr) && dnsErr.IsNotFound {
hostNotFoundCache.Set(hostname, struct{}{})
}
r.SetVerificationError(vErr, password)
}
}

if !s1.Verified {
if !r.Verified {
// Skip unverified findings where the password starts with a `$` - it's almost certainly a variable.
if strings.HasPrefix(password, "$") {
continue
}
}

results = append(results, s1)
results = append(results, r)
}

return results, nil
Expand All @@ -118,16 +152,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro
u.User = nil
nonCredentialedURL := u.String()

req, err := http.NewRequestWithContext(ctx, "GET", credentialedURL, nil)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, credentialedURL, nil)
if err != nil {
return false, err
}
req = req.WithContext(ctx)

credentialedRes, err := client.Do(req)
if err != nil {
return false, err
}
credentialedRes.Body.Close()
defer func() {
_, _ = io.Copy(io.Discard, credentialedRes.Body)
_ = credentialedRes.Body.Close()
}()

// If the credentialed URL returns a non 2XX code, we can assume it's a false positive.
if credentialedRes.StatusCode < 200 || credentialedRes.StatusCode > 299 {
Expand All @@ -136,16 +173,19 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro

time.Sleep(time.Millisecond * 10)

req, err = http.NewRequestWithContext(ctx, "GET", nonCredentialedURL, nil)
req, err = http.NewRequestWithContext(ctx, http.MethodGet, nonCredentialedURL, nil)
if err != nil {
return false, err
}
req = req.WithContext(ctx)

nonCredentialedRes, err := client.Do(req)
if err != nil {
return false, err
}
nonCredentialedRes.Body.Close()
defer func() {
_, _ = io.Copy(io.Discard, nonCredentialedRes.Body)
_ = nonCredentialedRes.Body.Close()
}()

// If the non-credentialed URL returns a non 400-428 code and basic auth header, we can assume it's verified now.
if nonCredentialedRes.StatusCode >= 400 && nonCredentialedRes.StatusCode < 429 {
Expand All @@ -156,11 +196,3 @@ func verifyURL(ctx context.Context, client *http.Client, u *url.URL) (bool, erro

return false, nil
}

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_URI
}

func (s Scanner) Description() string {
return "This detector identifies URLs with embedded credentials, which can be used to access web resources without explicit user interaction."
}

0 comments on commit 57620fe

Please sign in to comment.