Skip to content

Commit

Permalink
improve system and role detection
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbr0wn authored and alexopenline committed Nov 7, 2024
1 parent 72ed8d3 commit c6b81e6
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 80 deletions.
3 changes: 3 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,8 @@ dist:

aws s3 cp ./install.py $CLOUDFLARE_R2_BUCKET/install.py --endpoint-url $CLOUDFLARE_R2_ENDPOINT

run *ARGS:
go run main.go {{ARGS}}

test:
go test ./...
9 changes: 7 additions & 2 deletions mailvalidate/free_email.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@ type FreeEmails struct {
FreeEmailList []string `toml:"free_emails"`
}

func IsFreeEmailCheck(email string, freeEmails *FreeEmails) (bool, error) {
func IsFreeEmailCheck(email string) (bool, error) {
freeEmails, err := getFreeEmailList()
if err != nil {
return false, err
}

_, domain, ok := syntax.GetEmailUserAndDomain(email)
if !ok {
return false, fmt.Errorf("Not a valid email address")
Expand All @@ -29,7 +34,7 @@ func IsFreeEmailCheck(email string, freeEmails *FreeEmails) (bool, error) {
return false, nil
}

func GetFreeEmailList() (FreeEmails, error) {
func getFreeEmailList() (FreeEmails, error) {
var freeEmails FreeEmails

// Read the file
Expand Down
9 changes: 7 additions & 2 deletions mailvalidate/role_account.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ type RoleAccounts struct {
Matches []string `toml:"matches"`
}

func IsRoleAccountCheck(email string, roleAccounts *RoleAccounts) (bool, error) {
func IsRoleAccountCheck(email string) (bool, error) {
roleAccounts, err := getRoleAccounts()
if err != nil {
return false, err
}

user, _, ok := syntax.GetEmailUserAndDomain(email)
if !ok {
return false, fmt.Errorf("Not a valid email address")
Expand All @@ -38,7 +43,7 @@ func IsRoleAccountCheck(email string, roleAccounts *RoleAccounts) (bool, error)
return false, nil
}

func GetRoleAccounts() (RoleAccounts, error) {
func getRoleAccounts() (RoleAccounts, error) {
var roleAccounts RoleAccounts

// Read the file
Expand Down
2 changes: 2 additions & 0 deletions mailvalidate/role_emails.toml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ matches = [
]

contains = [
'-leave',
'abuse',
'academy',
'access',
Expand Down Expand Up @@ -765,6 +766,7 @@ contains = [
'secretaris',
'secretary',
'security',
'seedcamp',
'sekretariat',
'seminar',
'seniors',
Expand Down
122 changes: 80 additions & 42 deletions mailvalidate/system_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ import (
"unicode"
)

// IsSystemGeneratedUser checks if a username appears to be system generated
func IsSystemGeneratedUser(user string) bool {
if isNumeric(user) || isRandomUsername(user) {
return true
}
return false
return isNumeric(user) || isRandomUsername(user)
}

func isNumeric(s string) bool {
Expand All @@ -23,71 +21,110 @@ func isNumeric(s string) bool {
}

func isRandomUsername(username string) bool {
// Check if the username contains only allowed characters
// Basic validation for allowed characters in email usernames
allowedChars := regexp.MustCompile(`^[a-zA-Z0-9.=_+!#$%&'*+/=?^_{|}~-]+$`)
if !allowedChars.MatchString(username) {
return false
}

// Check for patterns with many numbers and dashes
numDashPattern := regexp.MustCompile(`(\d+-){3,}|\d{5,}`)
if numDashPattern.MatchString(username) {
return true
// Skip common name patterns (initials/name with numbers)
namePattern := regexp.MustCompile(`^[a-z]+\.[a-z]+\d{1,4}$`)
if namePattern.MatchString(username) && len(username) < 20 {
return false
}

// Check for long hexadecimal-like strings
hexPattern := regexp.MustCompile(`^[a-f0-9]{10,}$`)
if hexPattern.MatchString(username) {
return true
// Common system-generated patterns - check these before entropy
systemPatterns := []*regexp.Regexp{
// ld- and usr- patterns
regexp.MustCompile(`^(ld|usr)-[a-z0-9]{8,}$`),
// Unsubscribe patterns
regexp.MustCompile(`^unsub-[a-f0-9]{8}`),
regexp.MustCompile(`^[0-9]+\.[a-z0-9]{30,}`),
// UUID patterns
regexp.MustCompile(`[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}`),
// System email prefixes
regexp.MustCompile(`^(bounce|return|system|noreply|no-reply|donotreply|do-not-reply|unsubscribe)[-.][a-z0-9]`),
// Common prefixes with random strings
regexp.MustCompile(`^(usr|user|tmp|temp|random)-[a-z0-9]{8,}$`),
}

// Check for multiple segments separated by dots with numbers
segments := strings.Split(username, ".")
numericSegments := 0
for _, segment := range segments {
if regexp.MustCompile(`^\d+$`).MatchString(segment) {
numericSegments++
for _, pattern := range systemPatterns {
if pattern.MatchString(username) {
return true
}
}
if numericSegments >= 3 {

// Check entropy for non-name-like patterns
if isHighEntropy(username) && len(username) > 12 {
return true
}

// Check for UUID-like patterns (including those with prefixes)
uuidPattern := regexp.MustCompile(`[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}`)
if uuidPattern.MatchString(username) {
// Quick checks for obvious patterns
if strings.Count(username, "_") > 2 ||
strings.Contains(username, "=") ||
strings.Contains(username, "--") ||
len(username) >= 40 {
return true
}

// Check for long random string followed by a more structured part
randomStructuredPattern := regexp.MustCompile(`^[a-z0-9]{20,}[-=][a-z0-9._-]+$`)
if randomStructuredPattern.MatchString(username) {
return true
// Check if string after hyphen is random-looking
parts := strings.Split(username, "-")
if len(parts) == 2 && len(parts[1]) >= 8 {
return isHighEntropy(parts[1])
}

// Check for email aliases with random strings
aliasPattern := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+\+[a-zA-Z0-9]{8,}$`)
if aliasPattern.MatchString(username) {
return true
// Check for multiple numeric segments
segments := strings.Split(username, ".")
numericSegments := 0
for _, segment := range segments {
if regexp.MustCompile(`^\d+$`).MatchString(segment) {
numericSegments++
}
}

// New pattern: Check for short random alphanumeric strings
shortRandomPattern := regexp.MustCompile(`^[a-z0-9]{6,10}$`)
if shortRandomPattern.MatchString(username) {
return true
return numericSegments >= 3
}

func isHighEntropy(s string) bool {
if len(s) < 8 {
return false
}

if strings.Count(username, "_") > 2 {
return true
// Skip if it looks like a name with numbers but ensure we don't skip system patterns
nameWithNumbersPattern := regexp.MustCompile(`^[a-z]+\.?[a-z]*\d{1,4}$`)
if nameWithNumbersPattern.MatchString(s) && len(s) < 20 {
return false
}

if strings.Contains(username, "=") ||
strings.Contains(username, "--") {
return true
charMap := make(map[rune]bool)
consecutiveSame := 1
maxConsecutive := 1
var lastChar rune

for i, char := range s {
charMap[char] = true

if i > 0 {
if char == lastChar {
consecutiveSame++
if consecutiveSame > maxConsecutive {
maxConsecutive = consecutiveSame
}
} else {
consecutiveSame = 1
}
}
lastChar = char
}

// If none of the above patterns match, it's likely not a random username
return false
uniqueRatio := float64(len(charMap)) / float64(len(s))
consecutiveRatio := float64(maxConsecutive) / float64(len(s))
transitions := countTransitions(s)

return ((uniqueRatio > 0.6 && len(s) > 12) ||
(uniqueRatio > 0.7 && len(s) >= 8) ||
transitions > 4) &&
consecutiveRatio < 0.3
}

func countTransitions(s string) int {
Expand Down Expand Up @@ -119,3 +156,4 @@ func charTypeCheck(r rune) charType {
}
return otherType
}

52 changes: 18 additions & 34 deletions mailvalidate/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,25 +109,13 @@ func ValidateEmailSyntax(email string) SyntaxValidation {
IsSystemGenerated: IsSystemGeneratedUser(user),
}

freeEmails, err := GetFreeEmailList()
if err != nil {
returnedSyntaxValidation.Error = fmt.Sprintf("Error getting free email list: %s", err.Error())
return returnedSyntaxValidation
}

roleAccounts, err := GetRoleAccounts()
if err != nil {
returnedSyntaxValidation.Error = fmt.Sprintf("Error getting role accounts list: %s", err.Error())
return returnedSyntaxValidation
}

isFreeEmail, err := IsFreeEmailCheck(cleanEmail, &freeEmails)
isFreeEmail, err := IsFreeEmailCheck(cleanEmail)
if err != nil {
returnedSyntaxValidation.Error = fmt.Sprintf("Error running free email check: %s", err.Error())
return returnedSyntaxValidation
}

isRoleAccount, err := IsRoleAccountCheck(cleanEmail, &roleAccounts)
isRoleAccount, err := IsRoleAccountCheck(cleanEmail)
if err != nil {
returnedSyntaxValidation.Error = fmt.Sprintf("Error running role account check: %s", err.Error())
return returnedSyntaxValidation
Expand Down Expand Up @@ -171,14 +159,22 @@ func ValidateDomainWithCustomKnownProviders(validationRequest EmailValidationReq

results.IsPrimaryDomain, results.PrimaryDomain = domaincheck.PrimaryDomainCheck(domain)

catchAllResults := catchAllTest(&validationRequest)

if catchAllResults.IsDeliverable == "true" {
results.IsCatchAll = true
isFreeEmail, err := IsFreeEmailCheck(validationRequest.Email)
if err != nil {
results.Error = fmt.Sprintf("Error running free email check: %v", err)
return results
}

results.MailServerHealth = catchAllResults.MailServerHealth
results.SmtpResponse = catchAllResults.SmtpResponse
if !isFreeEmail {
catchAllResults := catchAllTest(&validationRequest)

if catchAllResults.IsDeliverable == "true" {
results.IsCatchAll = true
}

results.MailServerHealth = catchAllResults.MailServerHealth
results.SmtpResponse = catchAllResults.SmtpResponse
}

return results
}
Expand All @@ -204,28 +200,16 @@ func ValidateEmail(validationRequest EmailValidationRequest) EmailValidation {
return results
}

freeEmails, err := GetFreeEmailList()
if err != nil {
results.Error = fmt.Sprintf("Error getting free email list: %v", err)
return results
}

roleAccounts, err := GetRoleAccounts()
if err != nil {
results.Error = fmt.Sprintf("Error getting role accounts list: %v", err)
return results
}

email := emailSyntaxResult.CleanEmail

isFreeEmail, err := IsFreeEmailCheck(email, &freeEmails)
isFreeEmail, err := IsFreeEmailCheck(email)
if err != nil {
results.Error = fmt.Sprintf("Error running free email check: %v", err)
return results
}
results.IsFreeAccount = isFreeEmail

isRoleAccount, err := IsRoleAccountCheck(email, &roleAccounts)
isRoleAccount, err := IsRoleAccountCheck(email)
if err != nil {
results.Error = fmt.Sprintf("Error running role account check: %v", err)
return results
Expand Down

0 comments on commit c6b81e6

Please sign in to comment.