zach rice 7 лет назад
Родитель
Сommit
0726209172
4 измененных файлов с 185 добавлено и 250 удалено
  1. 4 0
      CHANGELOG.md
  2. 1 1
      gitleaks.toml
  3. 31 130
      gitleaks_test.go
  4. 149 119
      main.go

+ 4 - 0
CHANGELOG.md

@@ -1,6 +1,10 @@
 CHANGELOG
 =========
 
+1.7.3
+-----
+- style points
+
 1.7.2
 -----
 - Fixing dangling goroutines, removing channel messaging

+ 1 - 1
gitleaks.toml

@@ -23,7 +23,7 @@ regex = '''(?i)twitter.*['\"][0-9a-zA-Z]{35,44}['\"]'''
 
 [whitelist]
 regexes = [
-  "AKIA.*",
+  # "AKIA.*EXAMPLE",
 ]
 files = [
   "(.*?)(jpg|gif|doc|pdf|bin|go)$"

+ 31 - 130
gitleaks_test.go

@@ -56,30 +56,30 @@ regexes= [
 ]
 `
 
-var benchmarkRepo *Repo
-var benchmarkLeaksRepo *Repo
+var benchmarkRepo *RepoDescriptor
+var benchmarkLeaksRepo *RepoDescriptor
 
-func getBenchmarkLeaksRepo() *Repo {
+func getBenchmarkLeaksRepo() *RepoDescriptor {
 	if benchmarkLeaksRepo != nil {
 		return benchmarkLeaksRepo
 	}
 	leaksR, _ := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
 		URL: "https://github.com/gitleakstest/gronit.git",
 	})
-	benchmarkLeaksRepo = &Repo{
+	benchmarkLeaksRepo = &RepoDescriptor{
 		repository: leaksR,
 	}
 	return benchmarkLeaksRepo
 }
 
-func getBenchmarkRepo() *Repo {
+func getBenchmarkRepo() *RepoDescriptor {
 	if benchmarkRepo != nil {
 		return benchmarkRepo
 	}
 	bmRepo, _ := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
 		URL: "https://github.com/apple/swift-package-manager.git",
 	})
-	benchmarkRepo = &Repo{
+	benchmarkRepo = &RepoDescriptor{
 		repository: bmRepo,
 	}
 	return benchmarkRepo
@@ -157,7 +157,7 @@ func TestGetRepo(t *testing.T) {
 		g.Describe("TestGetRepo", func() {
 			g.It(test.description, func() {
 				opts = test.testOpts
-				_, err := getRepo()
+				_, err := cloneRepo()
 				if err != nil {
 					g.Assert(err.Error()).Equal(test.expectedErrMsg)
 				}
@@ -165,7 +165,7 @@ func TestGetRepo(t *testing.T) {
 		})
 	}
 }
-func TestRunAudit(t *testing.T) {
+func TestRun(t *testing.T) {
 	err := loadToml()
 	configsDir := testTomlLoader()
 	defer os.RemoveAll(configsDir)
@@ -266,7 +266,7 @@ func TestRunAudit(t *testing.T) {
 	}
 	g := goblin.Goblin(t)
 	for _, test := range tests {
-		g.Describe("TestRunAudit", func() {
+		g.Describe("TestRun", func() {
 			g.It(test.description, func() {
 				opts = test.testOpts
 				leaks, err := run()
@@ -279,105 +279,6 @@ func TestRunAudit(t *testing.T) {
 	}
 }
 
-func TestStartAudit(t *testing.T) {
-	err := loadToml()
-	configsDir := testTomlLoader()
-	defer os.RemoveAll(configsDir)
-
-	dir, err = ioutil.TempDir("", "gitleaksTestOwner")
-	defer os.RemoveAll(dir)
-	if err != nil {
-		panic(err)
-	}
-	git.PlainClone(dir+"/gronit", false, &git.CloneOptions{
-		URL: "https://github.com/gitleakstest/gronit",
-	})
-	git.PlainClone(dir+"/h1domains", false, &git.CloneOptions{
-		URL: "https://github.com/gitleakstest/h1domains",
-	})
-	var tests = []struct {
-		testOpts       Options
-		description    string
-		expectedErrMsg string
-		numLeaks       int
-	}{
-		{
-			testOpts: Options{
-				GithubUser: "gitleakstest",
-			},
-			description:    "test github user",
-			numLeaks:       2,
-			expectedErrMsg: "",
-		},
-		{
-			testOpts: Options{
-				GithubUser: "gitleakstest",
-				Disk:       true,
-			},
-			description:    "test github user on disk ",
-			numLeaks:       2,
-			expectedErrMsg: "",
-		},
-		{
-			testOpts: Options{
-				GithubOrg: "gitleakstestorg",
-			},
-			description:    "test github org",
-			numLeaks:       2,
-			expectedErrMsg: "",
-		},
-		{
-			testOpts: Options{
-				GithubOrg:      "gitleakstestorg",
-				IncludePrivate: true,
-			},
-			description:    "test private org no ssh",
-			numLeaks:       0,
-			expectedErrMsg: "no ssh auth available",
-		},
-		{
-			testOpts: Options{
-				GithubOrg: "gitleakstestorg",
-				Disk:      true,
-			},
-			description:    "test org on disk",
-			numLeaks:       2,
-			expectedErrMsg: "",
-		},
-		{
-			testOpts: Options{
-				GithubOrg:      "gitleakstestorg",
-				IncludePrivate: true,
-				Disk:           true,
-			},
-			description:    "test private org on disk no ssh",
-			numLeaks:       0,
-			expectedErrMsg: "no ssh auth available",
-		},
-		{
-			testOpts: Options{
-				OwnerPath: dir,
-			},
-			description:    "test owner path",
-			numLeaks:       2,
-			expectedErrMsg: "",
-		},
-	}
-	g := goblin.Goblin(t)
-	for _, test := range tests {
-		g.Describe("TestStartAudit", func() {
-			g.It(test.description, func() {
-				opts = test.testOpts
-				leaks, err := startAudits()
-				if err != nil {
-					g.Assert(err.Error()).Equal(test.expectedErrMsg)
-				}
-				g.Assert(len(leaks)).Equal(test.numLeaks)
-			})
-		})
-	}
-}
-
 func TestWriteReport(t *testing.T) {
 	tmpDir, _ := ioutil.TempDir("", "reportDir")
 	reportJSON := path.Join(tmpDir, "report.json")
@@ -461,7 +362,7 @@ func TestAuditRepo(t *testing.T) {
 	if err != nil {
 		panic(err)
 	}
-	leaksRepo := Repo{
+	leaksRepo := &RepoDescriptor{
 		repository: leaksR,
 		name:       "gronit",
 	}
@@ -472,7 +373,7 @@ func TestAuditRepo(t *testing.T) {
 	if err != nil {
 		panic(err)
 	}
-	cleanRepo := Repo{
+	cleanRepo := &RepoDescriptor{
 		repository: cleanR,
 		name:       "h1domains",
 	}
@@ -482,7 +383,7 @@ func TestAuditRepo(t *testing.T) {
 		description       string
 		expectedErrMsg    string
 		numLeaks          int
-		repo              Repo
+		repo              *RepoDescriptor
 		whiteListFiles    []*regexp.Regexp
 		whiteListCommits  map[string]bool
 		whiteListBranches []string
@@ -646,7 +547,7 @@ func TestAuditRepo(t *testing.T) {
 					loadToml()
 				}
 
-				leaks, err = auditRepo(test.repo)
+				leaks, err = auditGitRepo(test.repo)
 
 				if opts.Redact {
 					g.Assert(leaks[0].Offender).Equal("REDACTED")
@@ -838,7 +739,7 @@ func BenchmarkAuditRepo1Proc(b *testing.B) {
 	opts.MaxGoRoutines = 1
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -847,7 +748,7 @@ func BenchmarkAuditRepo2Proc(b *testing.B) {
 	opts.MaxGoRoutines = 2
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -856,7 +757,7 @@ func BenchmarkAuditRepo4Proc(b *testing.B) {
 	opts.MaxGoRoutines = 4
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -865,7 +766,7 @@ func BenchmarkAuditRepo8Proc(b *testing.B) {
 	opts.MaxGoRoutines = 8
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -874,7 +775,7 @@ func BenchmarkAuditRepo10Proc(b *testing.B) {
 	opts.MaxGoRoutines = 10
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -883,7 +784,7 @@ func BenchmarkAuditRepo100Proc(b *testing.B) {
 	opts.MaxGoRoutines = 100
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -892,7 +793,7 @@ func BenchmarkAuditRepo1000Proc(b *testing.B) {
 	opts.MaxGoRoutines = 1000
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 func BenchmarkAuditRepo10000Proc(b *testing.B) {
@@ -900,7 +801,7 @@ func BenchmarkAuditRepo10000Proc(b *testing.B) {
 	opts.MaxGoRoutines = 10000
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 func BenchmarkAuditRepo100000Proc(b *testing.B) {
@@ -908,7 +809,7 @@ func BenchmarkAuditRepo100000Proc(b *testing.B) {
 	opts.MaxGoRoutines = 100000
 	benchmarkRepo = getBenchmarkRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 func BenchmarkAuditLeakRepo1Proc(b *testing.B) {
@@ -916,7 +817,7 @@ func BenchmarkAuditLeakRepo1Proc(b *testing.B) {
 	opts.MaxGoRoutines = 1
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -925,7 +826,7 @@ func BenchmarkAuditLeakRepo2Proc(b *testing.B) {
 	opts.MaxGoRoutines = 2
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -934,7 +835,7 @@ func BenchmarkAuditLeakRepo4Proc(b *testing.B) {
 	opts.MaxGoRoutines = 4
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -943,7 +844,7 @@ func BenchmarkAuditLeakRepo8Proc(b *testing.B) {
 	opts.MaxGoRoutines = 8
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -952,7 +853,7 @@ func BenchmarkAuditLeakRepo10Proc(b *testing.B) {
 	opts.MaxGoRoutines = 10
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 func BenchmarkAuditLeakRepo100Proc(b *testing.B) {
@@ -960,7 +861,7 @@ func BenchmarkAuditLeakRepo100Proc(b *testing.B) {
 	opts.MaxGoRoutines = 100
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 func BenchmarkAuditLeakRepo1000Proc(b *testing.B) {
@@ -968,7 +869,7 @@ func BenchmarkAuditLeakRepo1000Proc(b *testing.B) {
 	opts.MaxGoRoutines = 1000
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -977,7 +878,7 @@ func BenchmarkAuditLeakRepo10000Proc(b *testing.B) {
 	opts.MaxGoRoutines = 10000
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }
 
@@ -986,6 +887,6 @@ func BenchmarkAuditLeakRepo100000Proc(b *testing.B) {
 	opts.MaxGoRoutines = 100000
 	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
 	for n := 0; n < b.N; n++ {
-		auditRepo(*benchmarkLeaksRepo)
+		auditGitRepo(benchmarkRepo)
 	}
 }

+ 149 - 119
main.go

@@ -35,7 +35,7 @@ import (
 )
 
 // Leak represents a leaked secret or regex match.
-// Leak can output to stdout as json if the --verbose option is set or
+// Output to stdout as json if the --verbose option is set or
 // as a csv if the --csv and --report options are set.
 type Leak struct {
 	Line     string `json:"line"`
@@ -58,13 +58,6 @@ type RepoDescriptor struct {
 	err        error
 }
 
-// Owner contains a collection of repos. This could represent an org or user.
-type Owner struct {
-	path  string
-	url   string
-	repos []RepoDescriptor
-}
-
 // Options for gitleaks
 type Options struct {
 	// remote target options
@@ -119,8 +112,16 @@ type Config struct {
 	}
 }
 
+type gitDiff struct {
+	content    string
+	commit     *object.Commit
+	filePath   string
+	branchName string
+	repoName   string
+}
+
 const defaultGithubURL = "https://api.github.com/"
-const version = "1.7.2"
+const version = "1.7.3"
 const errExit = 2
 const leakExit = 1
 const defaultConfig = `
@@ -241,11 +242,11 @@ func run() ([]Leak, error) {
 	// start audits
 	if opts.Repo != "" || opts.RepoPath != "" {
 		// Audit a single remote repo or a local repo.
-		repo, err := getRepo()
+		repo, err := cloneRepo()
 		if err != nil {
 			return leaks, err
 		}
-		return auditRepo(repo)
+		return auditGitRepo(repo)
 	} else if opts.OwnerPath != "" {
 		// Audit local repos. Gitleaks will look for all child directories of OwnerPath for
 		// git repos and perform an audit on said repos.
@@ -254,7 +255,7 @@ func run() ([]Leak, error) {
 			return leaks, err
 		}
 		for _, repo := range repos {
-			leaksFromRepo, err := auditRepo(repo)
+			leaksFromRepo, err := auditGitRepo(repo)
 			if err != nil {
 				return leaks, err
 			}
@@ -295,10 +296,10 @@ func writeReport(leaks []Leak) error {
 	return err
 }
 
-// getRepoDescriptor clones a repo to memory(default) or to disk if the --disk option is set. If you want to
+// cloneRepo clones a repo to memory(default) or to disk if the --disk option is set. If you want to
 // clone a private repo you must set the --private/-p option, use a ssh target, and have your ssh keys
 // configured. If you want to audit a local repo, getRepo will load up a repo located at --repo-path
-func getRepoDescriptor() (RepoDescriptor, error) {
+func cloneRepo() (*RepoDescriptor, error) {
 	var (
 		err  error
 		repo *git.Repository
@@ -336,7 +337,7 @@ func getRepoDescriptor() (RepoDescriptor, error) {
 			})
 		}
 	}
-	return RepoDescriptor{
+	return &RepoDescriptor{
 		repository: repo,
 		path:       opts.RepoPath,
 		url:        opts.Repo,
@@ -345,10 +346,66 @@ func getRepoDescriptor() (RepoDescriptor, error) {
 	}, nil
 }
 
+// auditGitRepo beings an audit on a git repository by checking the default HEAD branch, all branches, or
+// a single branch depending on what gitleaks is configured to do. Note when I say branch I really
+// mean reference as these branches are read only.
+func auditGitRepo(repo *RepoDescriptor) ([]Leak, error) {
+	var (
+		err   error
+		leaks []Leak
+	)
+	ref, err := repo.repository.Head()
+	if err != nil {
+		return leaks, err
+	}
+
+	if opts.AuditAllRefs {
+		skipBranch := false
+		refs, err := repo.repository.Storer.IterReferences()
+		if err != nil {
+			return leaks, err
+		}
+		err = refs.ForEach(func(ref *plumbing.Reference) error {
+			for _, b := range whiteListBranches {
+				if strings.HasSuffix(string(ref.Name()), b) {
+					skipBranch = true
+				}
+			}
+			if skipBranch {
+				skipBranch = false
+				return nil
+			}
+			branchLeaks := auditGitReference(repo, ref)
+			for _, leak := range branchLeaks {
+				leaks = append(leaks, leak)
+			}
+			return nil
+		})
+	} else {
+		if opts.Branch != "" {
+			foundBranch := false
+			refs, _ := repo.repository.Storer.IterReferences()
+			branch := strings.Split(opts.Branch, "/")[len(strings.Split(opts.Branch, "/"))-1]
+			err = refs.ForEach(func(refBranch *plumbing.Reference) error {
+				if strings.Split(refBranch.Name().String(), "/")[len(strings.Split(refBranch.Name().String(), "/"))-1] == branch {
+					foundBranch = true
+					ref = refBranch
+				}
+				return nil
+			})
+			if foundBranch == false {
+				return nil, nil
+			}
+		}
+		leaks = auditGitReference(repo, ref)
+	}
+	return leaks, err
+}
+
 // auditGitReference beings the audit for a git reference. This function will
 // traverse the git reference and audit each line of each diff. Set maximum concurrency with
 // the --max-go option (default is set to the number of cores on your cpu).
-func auditGitReference(repo RepoDescriptor, ref *plumbing.Reference) []Leak {
+func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
 	var (
 		err        error
 		prevCommit *object.Commit
@@ -390,6 +447,11 @@ func auditGitReference(repo RepoDescriptor, ref *plumbing.Reference) []Leak {
 					log.Warnf("recoverying from panic on commit %s, likely large diff causing panic", c.Hash.String())
 				}
 			}()
+			diff := gitDiff{
+				commit:     prevCommit,
+				branchName: string(ref.Name()),
+				repoName:   repoName,
+			}
 
 			if prevCommit == nil {
 				t, _ := c.Tree()
@@ -399,7 +461,10 @@ func auditGitReference(repo RepoDescriptor, ref *plumbing.Reference) []Leak {
 					if err != nil {
 						return err
 					}
-					chunkLeaks := checkDiff(content, c, file.Name, string(ref.Name()), repoName)
+					diff.filePath = file.Name
+					diff.content = content
+					diff.commit = c
+					chunkLeaks := inspect(diff)
 					for _, leak := range chunkLeaks {
 						mutex.Lock()
 						leaks = append(leaks, leak)
@@ -426,6 +491,7 @@ func auditGitReference(repo RepoDescriptor, ref *plumbing.Reference) []Leak {
 					} else if to != nil {
 						filePath = to.Path()
 					}
+					diff.filePath = filePath
 					for _, re := range whiteListFiles {
 						if re.FindString(filePath) != "" {
 							skipFile = true
@@ -438,8 +504,8 @@ func auditGitReference(repo RepoDescriptor, ref *plumbing.Reference) []Leak {
 					chunks := f.Chunks()
 					for _, chunk := range chunks {
 						if chunk.Type() == 1 || chunk.Type() == 2 {
-							// only check if adding or removing
-							chunkLeaks := checkDiff(chunk.Content(), prevCommit, filePath, string(ref.Name()), repoName)
+							diff.content = chunk.Content()
+							chunkLeaks := inspect(diff)
 							for _, leak := range chunkLeaks {
 								mutex.Lock()
 								leaks = append(leaks, leak)
@@ -457,66 +523,12 @@ func auditGitReference(repo RepoDescriptor, ref *plumbing.Reference) []Leak {
 	return leaks
 }
 
-// auditGitRepo beings an audit on a git repository by checking the default HEAD branch, all branches, or
-// a single branch depending on what gitleaks is configured to do.
-func auditGitRepo(repo RepoDescriptor) ([]Leak, error) {
-	var (
-		err   error
-		leaks []Leak
-	)
-	ref, err := repo.repository.Head()
-	if err != nil {
-		return leaks, err
-	}
-
-	if opts.AuditAllRefs {
-		skipBranch := false
-		refs, err := repo.repository.Storer.IterReferences()
-		if err != nil {
-			return leaks, err
-		}
-		err = refs.ForEach(func(ref *plumbing.Reference) error {
-			for _, b := range whiteListBranches {
-				if strings.HasSuffix(string(ref.Name()), b) {
-					skipBranch = true
-				}
-			}
-			if skipBranch {
-				skipBranch = false
-				return nil
-			}
-			branchLeaks := auditGitReference(repo, ref)
-			for _, leak := range branchLeaks {
-				leaks = append(leaks, leak)
-			}
-			return nil
-		})
-	} else {
-		if opts.Branch != "" {
-			foundBranch := false
-			refs, _ := repo.repository.Storer.IterReferences()
-			branch := strings.Split(opts.Branch, "/")[len(strings.Split(opts.Branch, "/"))-1]
-			err = refs.ForEach(func(refBranch *plumbing.Reference) error {
-				if strings.Split(refBranch.Name().String(), "/")[len(strings.Split(refBranch.Name().String(), "/"))-1] == branch {
-					foundBranch = true
-					ref = refBranch
-				}
-				return nil
-			})
-			if foundBranch == false {
-				return nil, nil
-			}
-		}
-		leaks = auditGitReference(repo, ref)
-	}
-	return leaks, err
-}
-
-// checkDiff accepts a string diff and commit object then performs a
-// regex check
-// checkDiff
-func checkDiff(diff string, commit *object.Commit, filePath string, branch string, repo string) []Leak {
-	lines := strings.Split(diff, "\n")
+// inspect will parse each line of the git diff's content against a set of regexes or
+// a set of regexes set by the config (see gitleaks.toml for example). This function
+// will skip lines that include a whitelisted regex. A list of leaks is returned.
+// If verbose mode (-v/--verbose) is set, then checkDiff will log leaks as they are discovered.
+func inspect(diff gitDiff) []Leak {
+	lines := strings.Split(diff.content, "\n")
 	var (
 		leaks    []Leak
 		skipLine bool
@@ -544,14 +556,14 @@ func checkDiff(diff string, commit *object.Commit, filePath string, branch strin
 
 			leak := Leak{
 				Line:     line,
-				Commit:   commit.Hash.String(),
+				Commit:   diff.commit.Hash.String(),
 				Offender: match,
 				Type:     leakType,
-				Message:  commit.Message,
-				Author:   commit.Author.String(),
-				File:     filePath,
-				Branch:   branch,
-				Repo:     repo,
+				Message:  diff.commit.Message,
+				Author:   diff.commit.Author.String(),
+				File:     diff.filePath,
+				Branch:   diff.branchName,
+				Repo:     diff.repoName,
 			}
 			if opts.Redact {
 				leak.Offender = "REDACTED"
@@ -566,18 +578,22 @@ func checkDiff(diff string, commit *object.Commit, filePath string, branch strin
 	return leaks
 }
 
-// auditOwner audits all of the owner's(user or org) repos
+// auditGithubRepos kicks off audits if --github-user or --github-org options are set.
+// First, we gather all the github repositories from the github api (this doesnt actually clone the repo).
+// After all the repos have been pulled from github's api we proceed to audit the repos by calling auditGithubRepo.
+// If an error occurs during an audit of a repo, that error is logged but won't break the execution cycle.
 func auditGithubRepos() ([]Leak, error) {
 	var (
 		err              error
 		githubRepos      []*github.Repository
-		rs               []*github.Repository
+		pagedGithubRepos []*github.Repository
 		resp             *github.Response
 		githubClient     *github.Client
 		githubOrgOptions *github.RepositoryListByOrgOptions
 		githubOptions    *github.RepositoryListOptions
 		done             bool
 		leaks            []Leak
+		ownerDir         string
 	)
 	ctx := context.Background()
 
@@ -611,40 +627,52 @@ func auditGithubRepos() ([]Leak, error) {
 		}
 		if opts.GithubUser != "" {
 			if opts.IncludePrivate {
-				rs, resp, err = githubClient.Repositories.List(ctx, "", githubOptions)
+				pagedGithubRepos, resp, err = githubClient.Repositories.List(ctx, "", githubOptions)
 			} else {
-				rs, resp, err = githubClient.Repositories.List(ctx, opts.GithubUser, githubOptions)
+				pagedGithubRepos, resp, err = githubClient.Repositories.List(ctx, opts.GithubUser, githubOptions)
 			}
 			if err != nil {
 				done = true
 			}
 			githubOptions.Page = resp.NextPage
-			githubRepos = append(githubRepos, rs...)
+			githubRepos = append(githubRepos, pagedGithubRepos...)
 			if resp.NextPage == 0 {
 				done = true
 			}
 		} else if opts.GithubOrg != "" {
-			rs, resp, err = githubClient.Repositories.ListByOrg(ctx, opts.GithubOrg, githubOrgOptions)
+			pagedGithubRepos, resp, err = githubClient.Repositories.ListByOrg(ctx, opts.GithubOrg, githubOrgOptions)
 			if err != nil {
 				done = true
 			}
 			githubOrgOptions.Page = resp.NextPage
-			githubRepos = append(githubRepos, rs...)
+			githubRepos = append(githubRepos, pagedGithubRepos...)
 			if resp.NextPage == 0 {
 				done = true
 			}
 		}
 		if opts.Log == "Debug" || opts.Log == "debug" {
-			for _, githubRepo := range rs {
+			for _, githubRepo := range pagedGithubRepos {
 				log.Debugf("staging repos %s", *githubRepo.Name)
 			}
 		}
 	}
 	if err != nil {
-		return leaks, err
+		return nil, err
+	}
+	if opts.Disk {
+		ownerDir, err = ioutil.TempDir(dir, opts.GithubUser)
+		os.RemoveAll(ownerDir)
 	}
 	for _, githubRepo := range githubRepos {
-		leaksFromRepo, err := auditGithubRepo(githubRepo)
+		repo, err := cloneGithubRepo(githubRepo)
+		if err != nil {
+			log.Warn(err)
+			continue
+		}
+		leaksFromRepo, err := auditGitRepo(repo)
+		if opts.Disk {
+			os.RemoveAll(fmt.Sprintf("%s/%s", ownerDir, *githubRepo.Name))
+		}
 		if len(leaksFromRepo) == 0 {
 			log.Infof("no leaks found for repo %s", *githubRepo.Name)
 		} else {
@@ -655,27 +683,26 @@ func auditGithubRepos() ([]Leak, error) {
 		}
 		leaks = append(leaks, leaksFromRepo...)
 	}
-	fmt.Println(leaks)
 	return leaks, nil
 }
 
-// auditGithubRepo clones repos from github
-func auditGithubRepo(githubRepo *github.Repository) ([]Leak, error) {
+// cloneGithubRepo clones a repo from the url parsed from a github repo. The repo
+// will be cloned to disk if --disk is set. If the repo is private, you must include the
+// --private/-p option. After the repo is clone, an audit will begin.
+func cloneGithubRepo(githubRepo *github.Repository) (*RepoDescriptor, error) {
 	var (
-		leaks []Leak
-		repo  *git.Repository
-		err   error
+		repo *git.Repository
+		err  error
 	)
 	log.Infof("cloning: %s", *githubRepo.Name)
 	if opts.Disk {
 		ownerDir, err := ioutil.TempDir(dir, opts.GithubUser)
-		defer os.RemoveAll(fmt.Sprintf("%s/%s", ownerDir, *githubRepo.Name))
 		if err != nil {
-			return leaks, fmt.Errorf("unable to generater owner temp dir: %v", err)
+			return nil, fmt.Errorf("unable to generater owner temp dir: %v", err)
 		}
 		if opts.IncludePrivate {
 			if sshAuth == nil {
-				return leaks, fmt.Errorf("no ssh auth available")
+				return nil, fmt.Errorf("no ssh auth available")
 			}
 			repo, err = git.PlainClone(fmt.Sprintf("%s/%s", ownerDir, *githubRepo.Name), false, &git.CloneOptions{
 				URL:  *githubRepo.SSHURL,
@@ -689,7 +716,7 @@ func auditGithubRepo(githubRepo *github.Repository) ([]Leak, error) {
 	} else {
 		if opts.IncludePrivate {
 			if sshAuth == nil {
-				return leaks, fmt.Errorf("no ssh auth available")
+				return nil, fmt.Errorf("no ssh auth available")
 			}
 			repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
 				URL:  *githubRepo.SSHURL,
@@ -702,15 +729,16 @@ func auditGithubRepo(githubRepo *github.Repository) ([]Leak, error) {
 		}
 	}
 	if err != nil {
-		return leaks, err
+		return nil, err
 	}
-	return auditRepo(Repo{
+	return &RepoDescriptor{
 		repository: repo,
 		name:       *githubRepo.Name,
-	})
+	}, nil
 }
 
-// githubToken returns a oauth2 client for the github api to consume
+// githubToken returns an oauth2 client for the github api to consume. This token is necessary
+// if you are running audits with --github-user or --github-org
 func githubToken() *http.Client {
 	githubToken := os.Getenv("GITHUB_TOKEN")
 	if githubToken == "" {
@@ -723,11 +751,11 @@ func githubToken() *http.Client {
 }
 
 // discoverRepos walks all the children of `path`. If a child directory
-// contain a .git file then that repo will be added
-func discoverRepos(ownerPath string) ([]Repo, error) {
+// contain a .git file then that repo will be added to the list of repos returned
+func discoverRepos(ownerPath string) ([]*RepoDescriptor, error) {
 	var (
 		err   error
-		repos []Repo
+		repos []*RepoDescriptor
 	)
 	files, err := ioutil.ReadDir(ownerPath)
 	if err != nil {
@@ -740,7 +768,7 @@ func discoverRepos(ownerPath string) ([]Repo, error) {
 			if err != nil {
 				continue
 			}
-			repos = append(repos, Repo{
+			repos = append(repos, &RepoDescriptor{
 				repository: r,
 				name:       f.Name(),
 				path:       repoPath,
@@ -811,9 +839,11 @@ func optsGuard() error {
 	return nil
 }
 
-// loadToml loads of the toml config containing regexes and whitelists
-// 1. look for config path
-// 2. two, look for gitleaks config env var
+// loadToml loads of the toml config containing regexes and whitelists.
+// This function will first look if the configPath is set and load the config
+// from that file. Otherwise will then look for the path set by the GITHLEAKS_CONIFG
+// env var. If that is not set, then gitleaks will continue with the default configs
+// specified by the const var at the top `defaultConfig`
 func loadToml() error {
 	var (
 		config     Config
@@ -840,9 +870,7 @@ func loadToml() error {
 		}
 	}
 
-	// load up regexes
 	if singleSearchRegex != nil {
-		// single search takes precedence over default regex
 		regexes["singleSearch"] = singleSearchRegex
 	} else {
 		for _, regex := range config.Regexes {
@@ -864,7 +892,9 @@ func loadToml() error {
 	return nil
 }
 
-// getSSHAuth generates ssh auth
+// getSSHAuth return an ssh auth use by go-git to clone repos behind authentication.
+// If --ssh-key is set then it will attempt to load the key from that path. If not,
+// gitleaks will use the default $HOME/.ssh/id_rsa key
 func getSSHAuth() (*ssh.PublicKeys, error) {
 	var (
 		sshKeyPath string