Просмотр исходного кода

Add option to check string entropy

Strings with high entropy have a good probability of being a secret.
Cristhian Amaya 7 лет назад
Родитель
Сommit
1cb15242fd
3 измененных файлов с 87 добавлено и 26 удалено
  1. 3 0
      CHANGELOG.md
  2. 16 0
      gitleaks_test.go
  3. 68 26
      main.go

+ 3 - 0
CHANGELOG.md

@@ -1,6 +1,9 @@
 CHANGELOG
 =========
 
+1.10.0
+-----
+- Add entropy option
 1.9.0
 -----
 - exclude fork option

+ 16 - 0
gitleaks_test.go

@@ -560,6 +560,14 @@ func TestAuditRepo(t *testing.T) {
 			numLeaks:    0,
 			configPath:  path.Join(configsDir, "repo"),
 		},
+		{
+			repo:        leaksRepo,
+			description: "leaks present with entropy",
+			testOpts: Options{
+				Entropy: 4.7,
+			},
+			numLeaks: 7,
+		},
 	}
 
 	whiteListCommits = make(map[string]bool)
@@ -686,6 +694,14 @@ func TestOptionGuard(t *testing.T) {
 			description:    "single search regex gaurd",
 			expectedErrMsg: "",
 		},
+		{
+			testOpts: Options{
+				GithubOrg: "fakeOrg",
+				Entropy:   9,
+			},
+			description:    "Invalid entropy level guard",
+			expectedErrMsg: "The maximum level of entropy is 8",
+		},
 	}
 	g := goblin.Goblin(t)
 	for _, test := range tests {

+ 68 - 26
main.go

@@ -7,6 +7,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
+	"math"
 	"net"
 	"net/http"
 	"net/url"
@@ -81,13 +82,14 @@ type Options struct {
 	OwnerPath string `long:"owner-path" description:"Path to owner directory (repos discovered)"`
 
 	// Process options
-	MaxGoRoutines int    `long:"max-go" description:"Maximum number of concurrent go-routines gitleaks spawns"`
-	Disk          bool   `long:"disk" description:"Clones repo(s) to disk"`
-	AuditAllRefs  bool   `long:"all-refs" description:"run audit on all refs"`
-	SingleSearch  string `long:"single-search" description:"single regular expression to search for"`
-	ConfigPath    string `long:"config" description:"path to gitleaks config"`
-	SSHKey        string `long:"ssh-key" description:"path to ssh key"`
-	ExcludeForks  bool   `long:"exclude-forks" description:"exclude forks for organization/user audits"`
+	MaxGoRoutines int     `long:"max-go" description:"Maximum number of concurrent go-routines gitleaks spawns"`
+	Disk          bool    `long:"disk" description:"Clones repo(s) to disk"`
+	AuditAllRefs  bool    `long:"all-refs" description:"run audit on all refs"`
+	SingleSearch  string  `long:"single-search" description:"single regular expression to search for"`
+	ConfigPath    string  `long:"config" description:"path to gitleaks config"`
+	SSHKey        string  `long:"ssh-key" description:"path to ssh key"`
+	ExcludeForks  bool    `long:"exclude-forks" description:"exclude forks for organization/user audits"`
+	Entropy       float64 `long:"entropy" short:"e" description:"Report a finding when a string has at least the entropy level you defined"`
 	// TODO: IncludeMessages  string `long:"messages" description:"include commit messages in audit"`
 
 	// Output options
@@ -124,7 +126,7 @@ type gitDiff struct {
 }
 
 const defaultGithubURL = "https://api.github.com/"
-const version = "1.9.0"
+const version = "1.10.0"
 const errExit = 2
 const leakExit = 1
 const defaultConfig = `
@@ -576,30 +578,66 @@ func inspect(diff gitDiff) []Leak {
 				break
 			}
 
-			leak := Leak{
-				Line:     line,
-				Commit:   diff.commit.Hash.String(),
-				Offender: match,
-				Type:     leakType,
-				Message:  diff.commit.Message,
-				Author:   diff.commit.Author.String(),
-				File:     diff.filePath,
-				Branch:   diff.branchName,
-				Repo:     diff.repoName,
-			}
-			if opts.Redact {
-				leak.Offender = "REDACTED"
-				leak.Line = "REDACTED"
-			}
-			if opts.Verbose {
-				leak.log()
+			leaks = addLeak(leaks, line, match, leakType, diff)
+		}
+
+		if opts.Entropy > 0 {
+			words := strings.Fields(line)
+			for _, word := range words {
+				if getShannonEntropy(word) >= opts.Entropy {
+					leaks = addLeak(leaks, line, word, "High Entropy", diff)
+				}
 			}
-			leaks = append(leaks, leak)
 		}
 	}
 	return leaks
 }
 
+func getShannonEntropy(data string) (entropy float64) {
+	if data == "" {
+		return 0
+	}
+
+	charCounts := make(map[rune]int)
+	for _, char := range data {
+		charCounts[char]++
+	}
+
+	invLength := 1.0 / float64(len(data))
+	for _, count := range charCounts {
+		freq := float64(count) * invLength
+		entropy -= freq * math.Log2(freq)
+	}
+
+	return entropy
+}
+
+func addLeak(leaks []Leak, line string, offender string, leakType string, diff gitDiff) []Leak {
+	leak := Leak{
+		Line:     line,
+		Commit:   diff.commit.Hash.String(),
+		Offender: offender,
+		Type:     leakType,
+		Message:  diff.commit.Message,
+		Author:   diff.commit.Author.String(),
+		File:     diff.filePath,
+		Branch:   diff.branchName,
+		Repo:     diff.repoName,
+	}
+
+	if opts.Redact {
+		leak.Offender = "REDACTED"
+		leak.Line = "REDACTED"
+	}
+
+	if opts.Verbose {
+		leak.log()
+	}
+
+	leaks = append(leaks, leak)
+	return leaks
+}
+
 // auditGithubRepos kicks off audits if --github-user or --github-org options are set.
 // First, we gather all the github repositories from the github api (this doesnt actually clone the repo).
 // After all the repos have been pulled from github's api we proceed to audit the repos by calling auditGithubRepo.
@@ -866,6 +904,10 @@ func optsGuard() error {
 		}
 	}
 
+	if opts.Entropy > 8 {
+		return fmt.Errorf("The maximum level of entropy is 8")
+	}
+
 	return nil
 }