4
0
Эх сурвалжийг харах

Reduce noise when running entropy checks

When there's a word on a line with high entropy, we check if the line
matches any of the regex configured on the `lineregexes` element of the
`entropy` section. If so, it's marked as a leaked.

This has reduced the number of leaks found with entropy enabled up to
80%
Cristhian Amaya 7 жил өмнө
parent
commit
ea49078ffd
2 өөрчлөгдсөн 53 нэмэгдсэн , 15 устгасан
  1. 2 2
      gitleaks_test.go
  2. 51 13
      main.go

+ 2 - 2
gitleaks_test.go

@@ -590,7 +590,7 @@ func TestAuditRepo(t *testing.T) {
 			testOpts: Options{
 			testOpts: Options{
 				Entropy: 4.7,
 				Entropy: 4.7,
 			},
 			},
-			numLeaks: 6,
+			numLeaks: 2,
 		},
 		},
 		{
 		{
 			repo:        leaksRepo,
 			repo:        leaksRepo,
@@ -611,7 +611,7 @@ func TestAuditRepo(t *testing.T) {
 		{
 		{
 			repo:        leaksRepo,
 			repo:        leaksRepo,
 			description: "toml entropy range",
 			description: "toml entropy range",
-			numLeaks:    298,
+			numLeaks:    58,
 			configPath:  path.Join(configsDir, "entropy"),
 			configPath:  path.Join(configsDir, "entropy"),
 		},
 		},
 		{
 		{

+ 51 - 13
main.go

@@ -101,6 +101,9 @@ type Config struct {
 		Description string
 		Description string
 		Regex       string
 		Regex       string
 	}
 	}
+	Entropy struct {
+		LineRegexes []string
+	}
 	Whitelist struct {
 	Whitelist struct {
 		Files   []string
 		Files   []string
 		Regexes []string
 		Regexes []string
@@ -169,6 +172,21 @@ regex = '''(?i)github(.{0,4})?['\"][0-9a-zA-Z]{35,40}['\"]'''
 description = "Slack"
 description = "Slack"
 regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?'''
 regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?'''
 
 
+[entropy]
+lineregexes = [
+	"api",
+	"key",
+	"signature",
+	"secret",
+	"password",
+	"pass",
+	"pwd",
+	"token",
+	"curl",
+	"wget",
+	"https?",
+]
+
 [whitelist]
 [whitelist]
 files = [
 files = [
   "(.*?)(jpg|gif|doc|pdf|bin)$"
   "(.*?)(jpg|gif|doc|pdf|bin)$"
@@ -196,6 +214,7 @@ var (
 	whiteListCommits  map[string]bool
 	whiteListCommits  map[string]bool
 	whiteListRepos    []*regexp.Regexp
 	whiteListRepos    []*regexp.Regexp
 	entropyRanges     []entropyRange
 	entropyRanges     []entropyRange
+	entropyRegexes    []*regexp.Regexp
 	fileDiffRegex     *regexp.Regexp
 	fileDiffRegex     *regexp.Regexp
 	sshAuth           *ssh.PublicKeys
 	sshAuth           *ssh.PublicKeys
 	dir               string
 	dir               string
@@ -630,23 +649,12 @@ func inspect(diff gitDiff) []Leak {
 		}
 		}
 
 
 		if opts.Entropy > 0 || len(entropyRanges) != 0 {
 		if opts.Entropy > 0 || len(entropyRanges) != 0 {
-			entropyLeak := false
 			words := strings.Fields(line)
 			words := strings.Fields(line)
 			for _, word := range words {
 			for _, word := range words {
 				entropy := getShannonEntropy(word)
 				entropy := getShannonEntropy(word)
-				if entropy >= opts.Entropy && len(entropyRanges) == 0 {
-					entropyLeak = true
-				}
-				if len(entropyRanges) != 0 {
-					for _, eR := range entropyRanges {
-						if entropy > eR.v1 && entropy < eR.v2 {
-							entropyLeak = true
-						}
-					}
-				}
-				if entropyLeak {
+
+				if entropyIsHighEnough(entropy) && highEntropyLineIsALeak(line) {
 					leaks = addLeak(leaks, line, word, fmt.Sprintf("Entropy: %.2f", entropy), diff)
 					leaks = addLeak(leaks, line, word, fmt.Sprintf("Entropy: %.2f", entropy), diff)
-					entropyLeak = false
 				}
 				}
 			}
 			}
 		}
 		}
@@ -700,6 +708,32 @@ func getShannonEntropy(data string) (entropy float64) {
 	return entropy
 	return entropy
 }
 }
 
 
+func entropyIsHighEnough(entropy float64) bool {
+	if entropy >= opts.Entropy && len(entropyRanges) == 0 {
+		return true
+	}
+
+	if len(entropyRanges) != 0 {
+		for _, eR := range entropyRanges {
+			if entropy > eR.v1 && entropy < eR.v2 {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+func highEntropyLineIsALeak(line string) bool {
+	for _, re := range entropyRegexes {
+		if re.FindString(line) != "" {
+			return true
+		}
+	}
+
+	return false
+}
+
 // discoverRepos walks all the children of `path`. If a child directory
 // discoverRepos walks all the children of `path`. If a child directory
 // contain a .git file then that repo will be added to the list of repos returned
 // contain a .git file then that repo will be added to the list of repos returned
 func discoverRepos(ownerPath string) ([]*RepoDescriptor, error) {
 func discoverRepos(ownerPath string) ([]*RepoDescriptor, error) {
@@ -842,6 +876,10 @@ func loadToml() error {
 		}
 		}
 	}
 	}
 
 
+	for _, regex := range config.Entropy.LineRegexes {
+		entropyRegexes = append(entropyRegexes, regexp.MustCompile(regex))
+	}
+
 	if singleSearchRegex != nil {
 	if singleSearchRegex != nil {
 		regexes["singleSearch"] = singleSearchRegex
 		regexes["singleSearch"] = singleSearchRegex
 	} else {
 	} else {