From 54b82bb02d18fd783a7760e13178b957f518e6eb Mon Sep 17 00:00:00 2001 From: jano3 <jano@bob.co.za> Date: Fri, 3 Nov 2023 07:11:03 +0200 Subject: [PATCH] Hide byte arrays when sanitising json strings --- logs/logs.go | 35 +++++++++++++++++------------------ string_utils/string_utils.go | 32 +++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/logs/logs.go b/logs/logs.go index 99aa65e..77f8662 100644 --- a/logs/logs.go +++ b/logs/logs.go @@ -30,33 +30,32 @@ var build string var raygunClient *raygun4go.Client // Password filtering -var passwordRegex = regexp.MustCompile(`(?i:\\?"password\\?"\s*:\s*\\?"(.*)\\?",).*`) +var passwordRegex = regexp.MustCompile(`(?i:\\?"password\\?"\s*:\s*\\?"(.*)\\?").*`) +var byteArrayRegex = regexp.MustCompile(`(?i:\\?"(?i:[\w]*)byte(?i:[\w]*)\\?"\s*:\s*\[([\d\s,]+)*\])`) func SanitiseLogs(logString string) string { + var isValidJsonString bool + isValidJsonString, logString = string_utils.PrettyJSON(logString) + if !isValidJsonString { + return logString + } + + logString = MaskByteArraysInJsonString(logString) logString = MaskPasswordsInJsonString(logString) return logString } -// MaskPasswordsInJsonString takes a string and, if it is a JSON string, sanitises all the password. In order for the -// regex to work correctly we need to prettify the JSON, so the function always returns a formatted JSON string. +// MaskPasswordsInJsonString takes a string and sanitises all the instances of fields named password. +// E.g. "{"password": "xyz123"}" will become "{"password": "***"}" func MaskPasswordsInJsonString(jsonString string) string { - var isValidJsonString bool - isValidJsonString, jsonString = string_utils.PrettyJSON(jsonString) - if !isValidJsonString { - return jsonString - } - - if passwordRegex.MatchString(jsonString) { - result := passwordRegex.FindAllStringSubmatch(jsonString, -1) - for _, match := range result { - if len(match) > 1 { - jsonString = strings.ReplaceAll(jsonString, match[1], "***") - } - } - } + return string_utils.ReplaceAllRegexStringSubmatch(passwordRegex, jsonString, "***") +} - return jsonString +// MaskByteArraysInJsonString takes a string and truncates all the instances of number array fields have the word +// "byte" in the name. E.g. {"file_bytes": [123,68,103]} will become "{"file_bytes": [...]}" +func MaskByteArraysInJsonString(jsonString string) string { + return string_utils.ReplaceAllRegexStringSubmatch(byteArrayRegex, jsonString, "...") } func SanitiseFields(fields map[string]interface{}) map[string]interface{} { diff --git a/string_utils/string_utils.go b/string_utils/string_utils.go index 3956820..efa8731 100644 --- a/string_utils/string_utils.go +++ b/string_utils/string_utils.go @@ -16,7 +16,14 @@ import ( "golang.org/x/text/unicode/norm" ) -const snakeCasePattern = `[a-z]([a-z0-9_]*[a-z0-9])*` +const ( + snakeCasePattern = `[a-z]([a-z0-9_]*[a-z0-9])*` + + regexIndexMatchStart = 0 + regexIndexMatchEnd = 1 + regexIndexSubmatchStart = 2 + regexIndexSubmatchEnd = 3 +) var snakeCaseRegex = regexp.MustCompile("^" + snakeCasePattern + "$") @@ -49,6 +56,29 @@ func ReplaceCaseInsensitive(string, toReplace, replaceWith string) string { return regex.ReplaceAllString(string, replaceWith) } +// ReplaceAllRegexStringSubmatch finds the submatches for a regular expression that has a single capturing group and +// replaces all the submatches (i.e. the part that matches the capturing group) with replaceWith. +// E.g. the regular expression re = a(x*)b captures any number of x's that are between an a and b. +// ReplaceAllRegexStringSubmatch(re, "-axxb-ab-axb-x-ax-xb-ba-", "?") will result in "-a?b-a?b-a?b-x-ax-xb-ba-" +func ReplaceAllRegexStringSubmatch(re *regexp.Regexp, s string, replaceWith string) string { + result := "" + lastIndex := 0 + + for _, v := range re.FindAllSubmatchIndex([]byte(s), -1) { + if len(v) == regexIndexSubmatchEnd+1 { + // One submatch - replace the submatch with replaceWith + result += s[lastIndex:v[regexIndexSubmatchStart]] + replaceWith + s[v[regexIndexSubmatchEnd]:v[regexIndexMatchEnd]] + lastIndex = v[regexIndexMatchEnd] + } else { + // A normal match with no submatch - don't replace anything (this should not really happen) + result += s[lastIndex:v[regexIndexMatchEnd]] + } + lastIndex = v[regexIndexMatchEnd] + } + + return result + s[lastIndex:] +} + // TrimQuotes - trims quotes from a string (ie: "foo" will return foo) func TrimQuotes(stringToTrim string) string { if len(stringToTrim) >= 2 { -- GitLab