From 54b82bb02d18fd783a7760e13178b957f518e6eb Mon Sep 17 00:00:00 2001
From: jano3 <jano@bob.co.za>
Date: Fri, 3 Nov 2023 07:11:03 +0200
Subject: [PATCH] Hide byte arrays when sanitising json strings

---
 logs/logs.go                 | 35 +++++++++++++++++------------------
 string_utils/string_utils.go | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/logs/logs.go b/logs/logs.go
index 99aa65e..77f8662 100644
--- a/logs/logs.go
+++ b/logs/logs.go
@@ -30,33 +30,32 @@ var build string
 var raygunClient *raygun4go.Client
 
 // Password filtering
-var passwordRegex = regexp.MustCompile(`(?i:\\?"password\\?"\s*:\s*\\?"(.*)\\?",).*`)
+var passwordRegex = regexp.MustCompile(`(?i:\\?"password\\?"\s*:\s*\\?"(.*)\\?").*`)
+var byteArrayRegex = regexp.MustCompile(`(?i:\\?"(?i:[\w]*)byte(?i:[\w]*)\\?"\s*:\s*\[([\d\s,]+)*\])`)
 
 func SanitiseLogs(logString string) string {
+	var isValidJsonString bool
+	isValidJsonString, logString = string_utils.PrettyJSON(logString)
+	if !isValidJsonString {
+		return logString
+	}
+
+	logString = MaskByteArraysInJsonString(logString)
 	logString = MaskPasswordsInJsonString(logString)
 
 	return logString
 }
 
-// MaskPasswordsInJsonString takes a string and, if it is a JSON string, sanitises all the password. In order for the
-// regex to work correctly we need to prettify the JSON, so the function always returns a formatted JSON string.
+// MaskPasswordsInJsonString takes a string and sanitises all the instances of fields named password.
+// E.g. "{"password": "xyz123"}" will become "{"password": "***"}"
 func MaskPasswordsInJsonString(jsonString string) string {
-	var isValidJsonString bool
-	isValidJsonString, jsonString = string_utils.PrettyJSON(jsonString)
-	if !isValidJsonString {
-		return jsonString
-	}
-
-	if passwordRegex.MatchString(jsonString) {
-		result := passwordRegex.FindAllStringSubmatch(jsonString, -1)
-		for _, match := range result {
-			if len(match) > 1 {
-				jsonString = strings.ReplaceAll(jsonString, match[1], "***")
-			}
-		}
-	}
+	return string_utils.ReplaceAllRegexStringSubmatch(passwordRegex, jsonString, "***")
+}
 
-	return jsonString
+// MaskByteArraysInJsonString takes a string and truncates all the instances of number array fields have the word
+// "byte" in the name. E.g. {"file_bytes": [123,68,103]} will become "{"file_bytes": [...]}"
+func MaskByteArraysInJsonString(jsonString string) string {
+	return string_utils.ReplaceAllRegexStringSubmatch(byteArrayRegex, jsonString, "...")
 }
 
 func SanitiseFields(fields map[string]interface{}) map[string]interface{} {
diff --git a/string_utils/string_utils.go b/string_utils/string_utils.go
index 3956820..efa8731 100644
--- a/string_utils/string_utils.go
+++ b/string_utils/string_utils.go
@@ -16,7 +16,14 @@ import (
 	"golang.org/x/text/unicode/norm"
 )
 
-const snakeCasePattern = `[a-z]([a-z0-9_]*[a-z0-9])*`
+const (
+	snakeCasePattern = `[a-z]([a-z0-9_]*[a-z0-9])*`
+
+	regexIndexMatchStart    = 0
+	regexIndexMatchEnd      = 1
+	regexIndexSubmatchStart = 2
+	regexIndexSubmatchEnd   = 3
+)
 
 var snakeCaseRegex = regexp.MustCompile("^" + snakeCasePattern + "$")
 
@@ -49,6 +56,29 @@ func ReplaceCaseInsensitive(string, toReplace, replaceWith string) string {
 	return regex.ReplaceAllString(string, replaceWith)
 }
 
+// ReplaceAllRegexStringSubmatch finds the submatches for a regular expression that has a single capturing group and
+// replaces all the submatches (i.e. the part that matches the capturing group) with replaceWith.
+// E.g. the regular expression re = a(x*)b captures any number of x's that are between an a and b.
+// ReplaceAllRegexStringSubmatch(re, "-axxb-ab-axb-x-ax-xb-ba-", "?") will result in "-a?b-a?b-a?b-x-ax-xb-ba-"
+func ReplaceAllRegexStringSubmatch(re *regexp.Regexp, s string, replaceWith string) string {
+	result := ""
+	lastIndex := 0
+
+	for _, v := range re.FindAllSubmatchIndex([]byte(s), -1) {
+		if len(v) == regexIndexSubmatchEnd+1 {
+			// One submatch - replace the submatch with replaceWith
+			result += s[lastIndex:v[regexIndexSubmatchStart]] + replaceWith + s[v[regexIndexSubmatchEnd]:v[regexIndexMatchEnd]]
+			lastIndex = v[regexIndexMatchEnd]
+		} else {
+			// A normal match with no submatch - don't replace anything (this should not really happen)
+			result += s[lastIndex:v[regexIndexMatchEnd]]
+		}
+		lastIndex = v[regexIndexMatchEnd]
+	}
+
+	return result + s[lastIndex:]
+}
+
 // TrimQuotes - trims quotes from a string (ie: "foo" will return foo)
 func TrimQuotes(stringToTrim string) string {
 	if len(stringToTrim) >= 2 {
-- 
GitLab