ソースを参照

Merge pull request #140 from zricethezav/feature/default-all

Feature/default all
Zachary Rice 7 年 前
コミット
bba926693e
4 ファイル変更49 行追加296 行削除
  1. 8 0
      CHANGELOG.md
  2. 0 2
      README.md
  3. 1 213
      gitleaks_test.go
  4. 40 81
      main.go

+ 8 - 0
CHANGELOG.md

@@ -1,6 +1,14 @@
 CHANGELOG
 =========
 
+1.19.0
+----
+- removed `--all-refs`. By default gitleaks now scans all branches, remote and local.
+- added commit memoizer to prevent duplicate commit audits
+- removed branch whitelist
+- removed branch from report as a commit is an object independent of branch
+- Better regexes for facebook, github, twitter (no more unbounded wildcards)
+
 1.18.0
 ----
 - fixing slack token

+ 0 - 2
README.md

@@ -68,14 +68,12 @@ Application Options:
       --github-org=    Github organization to audit
       --github-url=    GitHub API Base URL, use for GitHub Enterprise. Example: https://github.example.com/api/v3/ (default: https://api.github.com/)
       --github-pr=     Github PR url to audit. This does not clone the repo. GITHUB_TOKEN must be set
-  -b, --branch=        branch name to audit (defaults to HEAD)
   -c, --commit=        sha of commit to stop at
       --depth=         maximum commit depth
       --repo-path=     Path to repo
       --owner-path=    Path to owner directory (repos discovered)
       --threads=       Maximum number of threads gitleaks spawns
       --disk           Clones repo(s) to disk
-      --all-refs       run audit on all refs
       --single-search= single regular expression to search for
       --config=        path to gitleaks config
       --ssh-key=       path to ssh key

+ 1 - 213
gitleaks_test.go

@@ -35,16 +35,6 @@ files = [
   ".go",
 ]
 `
-const testWhitelistBranch = `
-[[regexes]]
-description = "AWS"
-regex = '''AKIA[0-9A-Z]{16}'''
-
-[whitelist]
-branches = [
-  "origin/master",
-]
-`
 
 const testWhitelistRegex = `
 [[regexes]]
@@ -88,35 +78,6 @@ entropy = [
 ]
 `
 
-var benchmarkRepo *RepoDescriptor
-var benchmarkLeaksRepo *RepoDescriptor
-
-func getBenchmarkLeaksRepo() *RepoDescriptor {
-	if benchmarkLeaksRepo != nil {
-		return benchmarkLeaksRepo
-	}
-	leaksR, _ := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
-		URL: "https://github.com/gitleakstest/gronit.git",
-	})
-	benchmarkLeaksRepo = &RepoDescriptor{
-		repository: leaksR,
-	}
-	return benchmarkLeaksRepo
-}
-
-func getBenchmarkRepo() *RepoDescriptor {
-	if benchmarkRepo != nil {
-		return benchmarkRepo
-	}
-	bmRepo, _ := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
-		URL: "https://github.com/apple/swift-package-manager.git",
-	})
-	benchmarkRepo = &RepoDescriptor{
-		repository: bmRepo,
-	}
-	return benchmarkRepo
-}
-
 func TestGetRepo(t *testing.T) {
 	var err error
 	dir, err = ioutil.TempDir("", "gitleaksTestRepo")
@@ -442,7 +403,6 @@ func TestWriteReport(t *testing.T) {
 func testTomlLoader() string {
 	tmpDir, _ := ioutil.TempDir("", "whiteListConfigs")
 	ioutil.WriteFile(path.Join(tmpDir, "regex"), []byte(testWhitelistRegex), 0644)
-	ioutil.WriteFile(path.Join(tmpDir, "branch"), []byte(testWhitelistBranch), 0644)
 	ioutil.WriteFile(path.Join(tmpDir, "commit"), []byte(testWhitelistCommit), 0644)
 	ioutil.WriteFile(path.Join(tmpDir, "file"), []byte(testWhitelistFile), 0644)
 	ioutil.WriteFile(path.Join(tmpDir, "repo"), []byte(testWhitelistRepo), 0644)
@@ -509,41 +469,6 @@ func TestAuditRepo(t *testing.T) {
 				Threads: 4,
 			},
 		},
-		{
-			repo:        leaksRepo,
-			description: "audit specific bad branch",
-			numLeaks:    2,
-			testOpts: Options{
-				Branch: "master",
-			},
-		},
-		{
-			repo:        leaksRepo,
-			description: "audit specific good branch",
-			numLeaks:    0,
-			testOpts: Options{
-				Branch: "dev",
-			},
-		},
-		{
-			repo:        leaksRepo,
-			description: "audit all branch",
-			numLeaks:    6,
-			testOpts: Options{
-				AuditAllRefs: true,
-			},
-		},
-		{
-			repo:        leaksRepo,
-			description: "audit all branch whitelist 1",
-			numLeaks:    4,
-			testOpts: Options{
-				AuditAllRefs: true,
-			},
-			whiteListBranches: []string{
-				"origin/master",
-			},
-		},
 		{
 			repo:        leaksRepo,
 			description: "two leaks present whitelist AWS.. no leaks",
@@ -592,15 +517,6 @@ func TestAuditRepo(t *testing.T) {
 			configPath:  path.Join(configsDir, "regex"),
 			numLeaks:    0,
 		},
-		{
-			repo:        leaksRepo,
-			description: "toml whitelist branch",
-			configPath:  path.Join(configsDir, "branch"),
-			testOpts: Options{
-				AuditAllRefs: true,
-			},
-			numLeaks: 4,
-		},
 		{
 			repo:        leaksRepo,
 			description: "toml whitelist file",
@@ -662,7 +578,7 @@ func TestAuditRepo(t *testing.T) {
 		{
 			repo:        leaksRepo,
 			description: "toml entropy range",
-			numLeaks:    283,
+			numLeaks:    284,
 			configPath:  path.Join(configsDir, "entropy"),
 		},
 		{
@@ -697,11 +613,6 @@ func TestAuditRepo(t *testing.T) {
 				} else {
 					whiteListCommits = nil
 				}
-				if test.whiteListBranches != nil {
-					whiteListBranches = test.whiteListBranches
-				} else {
-					whiteListBranches = nil
-				}
 				if test.whiteListRegexes != nil {
 					whiteListRegexes = test.whiteListRegexes
 				} else {
@@ -897,126 +808,3 @@ func TestLoadToml(t *testing.T) {
 		})
 	}
 }
-
-func BenchmarkAuditRepo1Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 1
-	benchmarkRepo = getBenchmarkRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditRepo2Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 2
-	benchmarkRepo = getBenchmarkRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditRepo4Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 4
-	benchmarkRepo = getBenchmarkRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditRepo8Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 8
-	benchmarkRepo = getBenchmarkRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditRepo10Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 10
-	benchmarkRepo = getBenchmarkRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditRepo100Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 100
-	benchmarkRepo = getBenchmarkRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditRepo1000Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 1000
-	benchmarkRepo = getBenchmarkRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-func BenchmarkAuditLeakRepo1Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 1
-	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditLeakRepo2Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 2
-	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditLeakRepo4Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 4
-	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditLeakRepo8Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 8
-	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-
-func BenchmarkAuditLeakRepo10Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 10
-	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-func BenchmarkAuditLeakRepo100Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 100
-	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}
-func BenchmarkAuditLeakRepo1000Proc(b *testing.B) {
-	loadToml()
-	opts.Threads = 1000
-	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
-	for n := 0; n < b.N; n++ {
-		auditGitRepo(benchmarkRepo)
-	}
-}

+ 40 - 81
main.go

@@ -73,7 +73,6 @@ type Options struct {
 		GitLabOrg  string `long:"gitlab-org" description:"Organization url to audit"`
 	*/
 
-	Branch string `short:"b" long:"branch" description:"branch name to audit (defaults to HEAD)"`
 	Commit string `short:"c" long:"commit" description:"sha of commit to stop at"`
 	Depth  int    `long:"depth" description:"maximum commit depth"`
 
@@ -84,7 +83,6 @@ type Options struct {
 	// Process options
 	Threads      int     `long:"threads" description:"Maximum number of threads gitleaks spawns"`
 	Disk         bool    `long:"disk" description:"Clones repo(s) to disk"`
-	AuditAllRefs bool    `long:"all-refs" description:"run audit on all refs"`
 	SingleSearch string  `long:"single-search" description:"single regular expression to search for"`
 	ConfigPath   string  `long:"config" description:"path to gitleaks config"`
 	SSHKey       string  `long:"ssh-key" description:"path to ssh key"`
@@ -108,11 +106,10 @@ type Config struct {
 		Regex       string
 	}
 	Whitelist struct {
-		Files    []string
-		Regexes  []string
-		Commits  []string
-		Branches []string
-		Repos    []string
+		Files   []string
+		Regexes []string
+		Commits []string
+		Repos   []string
 	}
 	Misc struct {
 		Entropy []string
@@ -123,7 +120,6 @@ type gitDiff struct {
 	content      string
 	commit       *object.Commit
 	filePath     string
-	branchName   string
 	repoName     string
 	githubCommit *github.RepositoryCommit
 	sha          string
@@ -138,7 +134,7 @@ type entropyRange struct {
 }
 
 const defaultGithubURL = "https://api.github.com/"
-const version = "1.18.0"
+const version = "1.19.0"
 const errExit = 2
 const leakExit = 1
 const defaultConfig = `
@@ -166,39 +162,22 @@ description = "PGP"
 regex = '''-----BEGIN PGP PRIVATE KEY BLOCK-----'''
 [[regexes]]
 description = "Facebook"
-regex = '''(?i)facebook.*['\"][0-9a-f]{32}['\"]'''
+regex = '''(?i)facebook(.{0,4})?['\"][0-9a-f]{32}['\"]'''
 [[regexes]]
 description = "Twitter"
-regex = '''(?i)twitter.*['\"][0-9a-zA-Z]{35,44}['\"]'''
+regex = '''(?i)twitter(.{0,4})?['\"][0-9a-zA-Z]{35,44}['\"]'''
 [[regexes]]
 description = "Github"
-regex = '''(?i)github.*['\"][0-9a-zA-Z]{35,40}['\"]'''
+regex = '''(?i)github(.{0,4})?['\"][0-9a-zA-Z]{35,40}['\"]'''
 [[regexes]]
 description = "Slack"
-regex = '''xox[baprs]-.*'''
-[[regexes]]
-description = "Telegram"
-regex = '''\d{5,}:A[a-zA-Z0-9_\-]{34,34}'''
+regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?'''
 
 [whitelist]
-regexes = [
-  '''(?i)github.*ref.*['\"][0-9a-fA-F]{35,40}['\"]''',
-  '''(?i)shasum.*['\"][0-9a-fA-F]{40}['\"]''',
-  '''(?i)githead.*['\"][0-9a-fA-F]{40}['\"]''',
-  '''(?i)email_hash.*['\"][0-9a-fA-F]{40}['\"]''',
-  '''(?i)email_hash.*['\"][0-9a-fA-F]{32}['\"]''',
-  '''(?i)rev.*:.*['\"][0-9a-fA-F]{40}['\"]''',
-]
-files = [
-  "(.*?)(jpg|gif|doc|pdf|bin)$"
-]
 #commits = [
 #  "BADHA5H1",
 #  "BADHA5H2",
 #]
-#branches = [
-#	"dev/goodrepo"
-#]
 #repos = [
 #	"mygoodrepo"
 #]
@@ -216,7 +195,6 @@ var (
 	whiteListRegexes  []*regexp.Regexp
 	whiteListFiles    []*regexp.Regexp
 	whiteListCommits  map[string]bool
-	whiteListBranches []string
 	whiteListRepos    []*regexp.Regexp
 	entropyRanges     []entropyRange
 	fileDiffRegex     *regexp.Regexp
@@ -224,6 +202,8 @@ var (
 	dir               string
 	threads           int
 	totalCommits      int64
+	commitMap         = make(map[string]bool)
+	cMutex            = &sync.Mutex{}
 )
 
 func init() {
@@ -429,51 +409,24 @@ func auditGitRepo(repo *RepoDescriptor) ([]Leak, error) {
 			return leaks, fmt.Errorf("skipping %s, whitelisted", repo.name)
 		}
 	}
-	ref, err := repo.repository.Head()
+
+	// clear commit cache
+	commitMap = make(map[string]bool)
+
+	refs, err := repo.repository.Storer.IterReferences()
 	if err != nil {
 		return leaks, err
 	}
-
-	if opts.AuditAllRefs {
-		skipBranch := false
-		refs, err := repo.repository.Storer.IterReferences()
-		if err != nil {
-			return leaks, err
-		}
-		err = refs.ForEach(func(ref *plumbing.Reference) error {
-			for _, b := range whiteListBranches {
-				if strings.HasSuffix(string(ref.Name()), b) {
-					skipBranch = true
-				}
-			}
-			if skipBranch {
-				skipBranch = false
-				return nil
-			}
-			branchLeaks := auditGitReference(repo, ref)
-			for _, leak := range branchLeaks {
-				leaks = append(leaks, leak)
-			}
+	err = refs.ForEach(func(ref *plumbing.Reference) error {
+		if ref.Name().IsTag() {
 			return nil
-		})
-	} else {
-		if opts.Branch != "" {
-			foundBranch := false
-			refs, _ := repo.repository.Storer.IterReferences()
-			branch := strings.Split(opts.Branch, "/")[len(strings.Split(opts.Branch, "/"))-1]
-			err = refs.ForEach(func(refBranch *plumbing.Reference) error {
-				if strings.Split(refBranch.Name().String(), "/")[len(strings.Split(refBranch.Name().String(), "/"))-1] == branch {
-					foundBranch = true
-					ref = refBranch
-				}
-				return nil
-			})
-			if foundBranch == false {
-				return nil, nil
-			}
 		}
-		leaks = auditGitReference(repo, ref)
-	}
+		branchLeaks := auditGitReference(repo, ref)
+		for _, leak := range branchLeaks {
+			leaks = append(leaks, leak)
+		}
+		return nil
+	})
 	return leaks, err
 }
 
@@ -503,6 +456,12 @@ func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
 		return nil
 	}
 	err = cIter.ForEach(func(c *object.Commit) error {
+		if commitMap[c.Hash.String()] {
+			return nil
+		}
+		cMutex.Lock()
+		commitMap[c.Hash.String()] = true
+		cMutex.Unlock()
 		if c == nil || c.Hash.String() == opts.Commit || (opts.Depth != 0 && commitCount == opts.Depth) {
 			cIter.Close()
 			return errors.New("ErrStop")
@@ -535,6 +494,9 @@ func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
 					return
 				}
 				for _, f := range patch.FilePatches() {
+					if f.IsBinary() {
+						continue
+					}
 					skipFile = false
 					from, to := f.Files()
 					filePath = "???"
@@ -556,14 +518,13 @@ func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
 					for _, chunk := range chunks {
 						if chunk.Type() == 1 || chunk.Type() == 2 {
 							diff := gitDiff{
-								branchName: string(ref.Name()),
-								repoName:   repoName,
-								filePath:   filePath,
-								content:    chunk.Content(),
-								sha:        c.Hash.String(),
-								author:     c.Author.String(),
-								message:    c.Message,
-								date:       c.Author.When,
+								repoName: repoName,
+								filePath: filePath,
+								content:  chunk.Content(),
+								sha:      c.Hash.String(),
+								author:   c.Author.String(),
+								message:  c.Message,
+								date:     c.Author.When,
 							}
 							chunkLeaks := inspect(diff)
 							for _, leak := range chunkLeaks {
@@ -651,7 +612,6 @@ func addLeak(leaks []Leak, line string, offender string, leakType string, diff g
 		Type:     leakType,
 		Author:   diff.author,
 		File:     diff.filePath,
-		Branch:   diff.branchName,
 		Repo:     diff.repoName,
 		Message:  diff.message,
 		Date:     diff.date,
@@ -838,7 +798,6 @@ func loadToml() error {
 			regexes[regex.Description] = regexp.MustCompile(regex.Regex)
 		}
 	}
-	whiteListBranches = config.Whitelist.Branches
 	whiteListCommits = make(map[string]bool)
 	for _, commit := range config.Whitelist.Commits {
 		whiteListCommits[commit] = true