Skip to content
Snippets Groups Projects
Select Git revision
  • 02882f755c0e8b5f250482d091068b020e153903
  • main default protected
  • trading_hours
  • refactor_trading_hours
  • audit_cleaning_cater_for_non_struct_fields
  • remove-info-logs
  • sl-refactor
  • 18-use-scan-for-param-values
  • 17-order-search-results
  • 4-simplify-framework-2
  • 1-http-error
  • v1.297.0
  • v1.296.0
  • v1.295.0
  • v1.294.0
  • v1.293.0
  • v1.292.0
  • v1.291.0
  • v1.290.0
  • v1.289.0
  • v1.288.0
  • v1.287.0
  • v1.286.0
  • v1.285.0
  • v1.284.0
  • v1.283.0
  • v1.282.0
  • v1.281.0
  • v1.280.0
  • v1.279.0
  • v1.278.0
31 results

sensitive_words.go

Blame
  • sensitive_words.go 2.03 KiB
    package logs
    
    import (
    	"bytes"
    	"regexp"
    	"strings"
    )
    
    //word may only consist of alpha-numerics with delimeters inside,
    //e.g. OTP "12345" or Card number "1234-12345678-12345678-1234"
    //and the delimiters may be spaces, dashes, underscores, slashes and dots
    //and it is considered case insensitive
    
    const sensitiveWordPattern = `[a-z0-9]([a-z0-9\.\\\/ _-]*[a-z0-9])*`
    
    var sensitiveWordRegex = regexp.MustCompile("^" + sensitiveWordPattern + "$")
    
    const delimiters = "()[]{}!@#$%^&*-=_+;:'\"|\\/?<>,.~` \n\r"
    
    func FilterSensitiveWordsMap(s string, wordsMap map[string]bool) (filtered string, changed bool) {
    	if len(wordsMap) == 0 {
    		return s, false
    	}
    
    	changed = false
    	f := []byte(s)
    	for word := range wordsMap {
    		//it will be inefficient to compile regex for each word in each context
    		//much quicker to just look for the word and see if it is delimited as required
    		//not to mach short words as part of longer words which may expose the word be assumption
    		//e.g. OTP "202" should not match part of a date 2021-01-02 making it ***1-01-02
    		wLen := len(word)
    		offset := 0
    		fLen := len(f)
    		for offset < fLen {
    			index := bytes.Index(f[offset:], []byte(word)) + offset
    			if index < offset {
    				break //word not found
    			}
    
    			//found the word, check delimiters before/after
    			if index > 0 && strings.IndexByte(delimiters, f[index-1]) < 0 {
    				offset = index + 1 //word match without delimiter before
    				continue
    			}
    
    			if index+wLen < fLen && strings.IndexByte(delimiters, f[index+wLen]) < 0 {
    				offset = index + 1 //word match without delimiter after
    				continue
    			}
    
    			//has delimiter after, this is a word match, replace any length match with 3 stars "***"
    			//pad length if required
    			pad := 0
    			for fLen < index+3 {
    				f = append(f, ' ')
    				fLen++
    				pad++
    			}
    			f = append(f[:index+3], f[index+wLen:fLen-pad]...)
    			f[index] = '*'
    			f[index+1] = '*'
    			f[index+2] = '*'
    			fLen = len(f)
    			changed = true
    			offset = index + 3 //for loop skipped index over word, now skip offset over delimiter
    		}
    	}
    	filtered = string(f)
    	return