package logs import ( "bytes" "regexp" "strings" ) //word may only consist of alpha-numerics with delimeters inside, //e.g. OTP "12345" or Card number "1234-12345678-12345678-1234" //and the delimiters may be spaces, dashes, underscores, slashes and dots //and it is considered case insensitive const sensitiveWordPattern = `[a-z0-9]([a-z0-9\.\\\/ _-]*[a-z0-9])*` var sensitiveWordRegex = regexp.MustCompile("^" + sensitiveWordPattern + "$") const delimiters = "()[]{}!@#$%^&*-=_+;:'\"|\\/?<>,.~` \n\r" func FilterSensitiveWordsMap(s string, wordsMap map[string]bool) (filtered string, changed bool) { if len(wordsMap) == 0 { return s, false } changed = false f := []byte(s) for word := range wordsMap { //it will be inefficient to compile regex for each word in each context //much quicker to just look for the word and see if it is delimited as required //not to mach short words as part of longer words which may expose the word be assumption //e.g. OTP "202" should not match part of a date 2021-01-02 making it ***1-01-02 wLen := len(word) offset := 0 fLen := len(f) for offset < fLen { index := bytes.Index(f[offset:], []byte(word)) + offset if index < offset { break //word not found } //found the word, check delimiters before/after if index > 0 && strings.IndexByte(delimiters, f[index-1]) < 0 { offset = index + 1 //word match without delimiter before continue } if index+wLen < fLen && strings.IndexByte(delimiters, f[index+wLen]) < 0 { offset = index + 1 //word match without delimiter after continue } //has delimiter after, this is a word match, replace any length match with 3 stars "***" //pad length if required pad := 0 for fLen < index+3 { f = append(f, ' ') fLen++ pad++ } f = append(f[:index+3], f[index+wLen:fLen-pad]...) f[index] = '*' f[index+1] = '*' f[index+2] = '*' fLen = len(f) changed = true offset = index + 3 //for loop skipped index over word, now skip offset over delimiter } } filtered = string(f) return }