diff --git a/string_utils/string_utils.go b/string_utils/string_utils.go index 9d591700c5dcc907f9f433929d95c68b782eb078..ea8d4f4d3b40c229712e32ed4828c1d74e316523 100644 --- a/string_utils/string_utils.go +++ b/string_utils/string_utils.go @@ -26,6 +26,45 @@ const ( regexIndexSubmatchEnd = 3 ) +var WhitespaceChars = []string{ + // Standard whitespace characters + "\u0009", // Character tabulation + "\u000A", // Line feed + "\u000B", // Line tabulation + "\u000C", // Form feed + "\u000D", // Carriage return + "\u0020", // Space + "\u0085", // Next line + "\u00A0", // No-break space + "\u1680", // Ogham space mark + "\u2000", // En quad + "\u2001", // Em quad + "\u2002", // En space + "\u2003", // Em space + "\u2004", // Three-per-em space + "\u2005", // Four-per-em space + "\u2006", // Six-per-em space + "\u2007", // Figure space + "\u2008", // Punctuation space + "\u2009", // Thin space + "\u200A", // Hair space + "\u2028", // Line separator + "\u2029", // Paragraph separator + "\u202F", // Narrow no-break space + "\u205F", // Medium mathematical space + "\u3000", // Ideographic space +} + +var NonOfficialWhitespaceChars = []string{ + // Characters with property White_Space=no + "\u180E", // Mongolian vowel separator + "\u200B", // Zero width space + "\u200C", // Zero width non-joiner + "\u200D", // Zero width joiner + "\u2060", // Word joiner + "\uFEFF", // Zero width no-break space +} + var snakeCaseRegex = regexp.MustCompile("^" + snakeCasePattern + "$") func IsSnakeCase(name string) bool { @@ -49,7 +88,11 @@ func ReplaceNonSpacingMarks(str string) string { } func RemoveAllWhiteSpaces(s string) string { - return strings.ReplaceAll(strings.ReplaceAll(s, " ", ""), "\t", "") + cleanedString := strings.ReplaceAll(strings.ReplaceAll(s, " ", ""), "\t", "") + for _, whitespaceChar := range WhitespaceChars { + cleanedString = strings.ReplaceAll(cleanedString, whitespaceChar, "") + } + return cleanedString } func ReplaceCaseInsensitive(string, toReplace, replaceWith string) string { diff --git a/utils/utils.go b/utils/utils.go index f0a27b92ac5730717bdda9fd4fe914e415e2585b..042c228ff8545de2e408bd3aa6877c3dc26f8cd8 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -2,10 +2,10 @@ package utils import ( "bytes" + emailverifier "github.com/AfterShip/email-verifier" "github.com/mohae/deepcopy" "gitlab.bob.co.za/bob-public-utils/bobgroup-go-utils/errors" "gitlab.bob.co.za/bob-public-utils/bobgroup-go-utils/string_utils" - "net/mail" "net/url" "os" "reflect" @@ -55,23 +55,68 @@ func PointerToValue[V any](value *V) V { } func ValidateEmailAddress(email string) (string, error) { - if email == "" { + // To lower + cleanedEmail := strings.ToLower(strings.TrimSpace(email)) + + // Remove all whitespaces + cleanedEmail = string_utils.RemoveAllWhiteSpaces(cleanedEmail) + + // Also remove unofficial whitespaces + for _, char := range string_utils.NonOfficialWhitespaceChars { + cleanedEmail = strings.ReplaceAll(cleanedEmail, char, "") + } + + // Strip invalid characters + cleanedEmail = stripInvalidCharacters(cleanedEmail) + + // Make sure the email is not empty + if cleanedEmail == "" { return "", errors.Error("email address is empty") } - cleanEmail := strings.ToLower(strings.TrimSpace(email)) - cleanEmail = string_utils.RemoveAllWhiteSpaces(cleanEmail) + // Parse and verify the email + verifier := emailverifier.NewVerifier() + result, err := verifier.Verify(cleanedEmail) + if err != nil || !result.Syntax.Valid { + return cleanedEmail, errors.Wrap(err, "could not parse email address") + } - // Remove ZWSP ("\u200B") characters with an empty string to remove it - cleanEmail = strings.ReplaceAll(cleanEmail, "\u200B", "") + return cleanedEmail, nil +} - // We validate it but still return it since in some cases we don't want to break everything if the email is bad - _, err := mail.ParseAddress(cleanEmail) - if err != nil { - return cleanEmail, errors.Wrap(err, "could not parse email address") +func stripInvalidCharacters(email string) string { + cleanEmail := email + + // Replace quotes, asterisks, etc. + cleanEmail = strings.ReplaceAll(cleanEmail, "'", "") + cleanEmail = strings.ReplaceAll(cleanEmail, "*", "") + cleanEmail = strings.ReplaceAll(cleanEmail, "!", "") + cleanEmail = strings.ReplaceAll(cleanEmail, "+", "") + + // Trim invalid characters, like underscore, so that it still fails if it's inside the email + cleanEmail = strings.Trim(cleanEmail, "_") + + return cleanEmail +} + +func SplitAndCleanEmailAddresses(emails string) []string { + var destinationEmails []string + + splitEmails := string_utils.SplitString(emails, []rune{',', ';'}) + if len(splitEmails) >= 1 { + // Success - return these emails + for _, email := range splitEmails { + cleanedEmail, err := ValidateEmailAddress(email) + if err == nil && cleanedEmail != "" { + destinationEmails = append(destinationEmails, cleanedEmail) + } + } + if len(destinationEmails) > 0 { + return destinationEmails + } } - return cleanEmail, nil + return destinationEmails } func StripEmail(email string) (strippedEmail string, strippedDomain string) {