Explorar o código

Merge pull request #159 from camaya/reduce-entropy-noise

Reduce noise when running entropy checks
Zachary Rice %!s(int64=7) %!d(string=hai) anos
pai
achega
af7717412f
Modificáronse 3 ficheiros con 106 adicións e 44 borrados
  1. 25 24
      README.md
  2. 18 0
      gitleaks_test.go
  3. 63 20
      main.go

+ 25 - 24
README.md

@@ -63,30 +63,31 @@ Usage:
   gitleaks [OPTIONS]
 
 Application Options:
-  -r, --repo=          Repo url to audit
-      --github-user=   Github user to audit
-      --github-org=    Github organization to audit
-      --github-url=    GitHub API Base URL, use for GitHub Enterprise. Example: https://github.example.com/api/v3/ (default: https://api.github.com/)
-      --github-pr=     Github PR url to audit. This does not clone the repo. GITHUB_TOKEN must be set
-      --gitlab-user=   GitLab user ID to audit
-      --gitlab-org=    GitLab group ID to audit
-  -c, --commit=        sha of commit to stop at
-      --depth=         maximum commit depth
-      --repo-path=     Path to repo
-      --owner-path=    Path to owner directory (repos discovered)
-      --threads=       Maximum number of threads gitleaks spawns
-      --disk           Clones repo(s) to disk
-      --single-search= single regular expression to search for
-      --config=        path to gitleaks config
-      --ssh-key=       path to ssh key
-      --exclude-forks  exclude forks for organization/user audits
-  -e, --entropy=       Include entropy checks during audit. Entropy scale: 0.0(no entropy) - 8.0(max entropy)
-  -l, --log=           log level
-  -v, --verbose        Show verbose output from gitleaks audit
-      --report=        path to write report file
-      --redact         redact secrets from log messages and report
-      --version        version number
-      --sample-config  prints a sample config file
+  -r, --repo=           Repo url to audit
+      --github-user=    Github user to audit
+      --github-org=     Github organization to audit
+      --github-url=     GitHub API Base URL, use for GitHub Enterprise. Example: https://github.example.com/api/v3/ (default: https://api.github.com/)
+      --github-pr=      Github PR url to audit. This does not clone the repo. GITHUB_TOKEN must be set
+      --gitlab-user=    GitLab user ID to audit
+      --gitlab-org=     GitLab group ID to audit
+  -c, --commit=         sha of commit to stop at
+      --depth=          maximum commit depth
+      --repo-path=      Path to repo
+      --owner-path=     Path to owner directory (repos discovered)
+      --threads=        Maximum number of threads gitleaks spawns
+      --disk            Clones repo(s) to disk
+      --single-search=  single regular expression to search for
+      --config=         path to gitleaks config
+      --ssh-key=        path to ssh key
+      --exclude-forks   exclude forks for organization/user audits
+  -e, --entropy=        Include entropy checks during audit. Entropy scale: 0.0(no entropy) - 8.0(max entropy)
+      --noise-reduction Reduce the number of finds when entropy checks are enabled
+  -l, --log=            log level
+  -v, --verbose         Show verbose output from gitleaks audit
+      --report=         path to write report file
+      --redact          redact secrets from log messages and report
+      --version         version number
+      --sample-config   prints a sample config file
 
 Help Options:
   -h, --help           Show this help message

+ 18 - 0
gitleaks_test.go

@@ -592,6 +592,15 @@ func TestAuditRepo(t *testing.T) {
 			},
 			numLeaks: 6,
 		},
+		{
+			repo:        leaksRepo,
+			description: "leaks present with entropy",
+			testOpts: Options{
+				Entropy:        4.7,
+				NoiseReduction: true,
+			},
+			numLeaks: 2,
+		},
 		{
 			repo:        leaksRepo,
 			description: "Audit until specific commit",
@@ -614,6 +623,15 @@ func TestAuditRepo(t *testing.T) {
 			numLeaks:    298,
 			configPath:  path.Join(configsDir, "entropy"),
 		},
+		{
+			repo: leaksRepo,
+			testOpts: Options{
+				NoiseReduction: true,
+			},
+			description: "toml entropy range",
+			numLeaks:    58,
+			configPath:  path.Join(configsDir, "entropy"),
+		},
 		{
 			repo:           leaksRepo,
 			description:    "toml bad entropy range",

+ 63 - 20
main.go

@@ -77,13 +77,14 @@ type Options struct {
 	OwnerPath string `long:"owner-path" description:"Path to owner directory (repos discovered)"`
 
 	// Process options
-	Threads      int     `long:"threads" description:"Maximum number of threads gitleaks spawns"`
-	Disk         bool    `long:"disk" description:"Clones repo(s) to disk"`
-	SingleSearch string  `long:"single-search" description:"single regular expression to search for"`
-	ConfigPath   string  `long:"config" description:"path to gitleaks config"`
-	SSHKey       string  `long:"ssh-key" description:"path to ssh key"`
-	ExcludeForks bool    `long:"exclude-forks" description:"exclude forks for organization/user audits"`
-	Entropy      float64 `long:"entropy" short:"e" description:"Include entropy checks during audit. Entropy scale: 0.0(no entropy) - 8.0(max entropy)"`
+	Threads        int     `long:"threads" description:"Maximum number of threads gitleaks spawns"`
+	Disk           bool    `long:"disk" description:"Clones repo(s) to disk"`
+	SingleSearch   string  `long:"single-search" description:"single regular expression to search for"`
+	ConfigPath     string  `long:"config" description:"path to gitleaks config"`
+	SSHKey         string  `long:"ssh-key" description:"path to ssh key"`
+	ExcludeForks   bool    `long:"exclude-forks" description:"exclude forks for organization/user audits"`
+	Entropy        float64 `long:"entropy" short:"e" description:"Include entropy checks during audit. Entropy scale: 0.0(no entropy) - 8.0(max entropy)"`
+	NoiseReduction bool    `long:"noise-reduction" description:"Reduce the number of finds when entropy checks are enabled"`
 	// TODO: IncludeMessages  string `long:"messages" description:"include commit messages in audit"`
 
 	// Output options
@@ -101,6 +102,9 @@ type Config struct {
 		Description string
 		Regex       string
 	}
+	Entropy struct {
+		LineRegexes []string
+	}
 	Whitelist struct {
 		Files   []string
 		Regexes []string
@@ -169,6 +173,21 @@ regex = '''(?i)github(.{0,4})?['\"][0-9a-zA-Z]{35,40}['\"]'''
 description = "Slack"
 regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?'''
 
+[entropy]
+lineregexes = [
+	"api",
+	"key",
+	"signature",
+	"secret",
+	"password",
+	"pass",
+	"pwd",
+	"token",
+	"curl",
+	"wget",
+	"https?",
+]
+
 [whitelist]
 files = [
   "(.*?)(jpg|gif|doc|pdf|bin)$"
@@ -196,6 +215,7 @@ var (
 	whiteListCommits  map[string]bool
 	whiteListRepos    []*regexp.Regexp
 	entropyRanges     []entropyRange
+	entropyRegexes    []*regexp.Regexp
 	fileDiffRegex     *regexp.Regexp
 	sshAuth           *ssh.PublicKeys
 	dir               string
@@ -630,23 +650,12 @@ func inspect(diff gitDiff) []Leak {
 		}
 
 		if opts.Entropy > 0 || len(entropyRanges) != 0 {
-			entropyLeak := false
 			words := strings.Fields(line)
 			for _, word := range words {
 				entropy := getShannonEntropy(word)
-				if entropy >= opts.Entropy && len(entropyRanges) == 0 {
-					entropyLeak = true
-				}
-				if len(entropyRanges) != 0 {
-					for _, eR := range entropyRanges {
-						if entropy > eR.v1 && entropy < eR.v2 {
-							entropyLeak = true
-						}
-					}
-				}
-				if entropyLeak {
+
+				if entropyIsHighEnough(entropy) && highEntropyLineIsALeak(line) {
 					leaks = addLeak(leaks, line, word, fmt.Sprintf("Entropy: %.2f", entropy), diff)
-					entropyLeak = false
 				}
 			}
 		}
@@ -700,6 +709,36 @@ func getShannonEntropy(data string) (entropy float64) {
 	return entropy
 }
 
+func entropyIsHighEnough(entropy float64) bool {
+	if entropy >= opts.Entropy && len(entropyRanges) == 0 {
+		return true
+	}
+
+	if len(entropyRanges) != 0 {
+		for _, eR := range entropyRanges {
+			if entropy > eR.v1 && entropy < eR.v2 {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
+func highEntropyLineIsALeak(line string) bool {
+	if !opts.NoiseReduction {
+		return true
+	}
+
+	for _, re := range entropyRegexes {
+		if re.FindString(line) != "" {
+			return true
+		}
+	}
+
+	return false
+}
+
 // discoverRepos walks all the children of `path`. If a child directory
 // contain a .git file then that repo will be added to the list of repos returned
 func discoverRepos(ownerPath string) ([]*RepoDescriptor, error) {
@@ -842,6 +881,10 @@ func loadToml() error {
 		}
 	}
 
+	for _, regex := range config.Entropy.LineRegexes {
+		entropyRegexes = append(entropyRegexes, regexp.MustCompile(regex))
+	}
+
 	if singleSearchRegex != nil {
 		regexes["singleSearch"] = singleSearchRegex
 	} else {