Ver código fonte

more refactoring

zach rice 7 anos atrás
pai
commit
ac14d347de
6 arquivos alterados com 146 adições e 187 exclusões
  1. 1 1
      src/constants.go
  2. 7 34
      src/core.go
  3. 10 10
      src/github.go
  4. 2 2
      src/gitlab.go
  5. 115 128
      src/repo.go
  6. 11 12
      src/utils.go

+ 1 - 1
src/constants.go

@@ -1,6 +1,6 @@
 package gitleaks
 
-const version = "1.24.0"
+const version = "1.25.0"
 
 const defaultGithubURL = "https://api.github.com/"
 const defaultThreadNum = 1

+ 7 - 34
src/core.go

@@ -8,37 +8,10 @@ import (
 	"sync"
 	"time"
 
-	"github.com/google/go-github/github"
 	"github.com/hako/durafmt"
 	log "github.com/sirupsen/logrus"
-	"gopkg.in/src-d/go-git.v4/plumbing/object"
 )
 
-// Leak represents a leaked secret or regex match.
-type Leak struct {
-	Line     string    `json:"line"`
-	Commit   string    `json:"commit"`
-	Offender string    `json:"offender"`
-	Type     string    `json:"reason"`
-	Message  string    `json:"commitMsg"`
-	Author   string    `json:"author"`
-	File     string    `json:"file"`
-	Repo     string    `json:"repo"`
-	Date     time.Time `json:"date"`
-}
-
-type gitDiff struct {
-	content      string
-	commit       *object.Commit
-	filePath     string
-	repoName     string
-	githubCommit *github.RepositoryCommit
-	sha          string
-	message      string
-	author       string
-	date         time.Time
-}
-
 var (
 	opts              *Options
 	config            *Config
@@ -81,29 +54,29 @@ func Run(optsL *Options) {
 
 	// start audits
 	if opts.Repo != "" || opts.RepoPath != "" {
-		repoD, err := newRepoInfo()
+		repoInfo, err := newRepoInfo()
 		if err != nil {
 			goto postAudit
 		}
-		err = repoD.clone()
+		err = repoInfo.clone()
 		if err != nil {
 			goto postAudit
 		}
-		leaks, err = repoD.audit()
+		leaks, err = repoInfo.audit()
 	} else if opts.OwnerPath != "" {
 		repoDs, err := discoverRepos(opts.OwnerPath)
 		if err != nil {
 			goto postAudit
 		}
-		for _, repoD := range repoDs {
-			err = repoD.clone()
+		for _, repoInfo := range repoDs {
+			err = repoInfo.clone()
 			if err != nil {
 				continue
 			}
-			leaksFromRepo, err := repoD.audit()
+			leaksFromRepo, err := repoInfo.audit()
 
 			if err != nil {
-				log.Warnf("error occured auditing repo: %s, continuing", repoD.name)
+				log.Warnf("error occured auditing repo: %s, continuing", repoInfo.name)
 			}
 			leaks = append(leaksFromRepo, leaks...)
 		}

+ 10 - 10
src/github.go

@@ -43,13 +43,13 @@ func auditGithubPR() ([]Leak, error) {
 			return leaks, err
 		}
 
-		for _, commit := range commits {
+		for _, c := range commits {
 			totalCommits = totalCommits + 1
-			commit, _, err := githubClient.Repositories.GetCommit(ctx, owner, repo, *commit.SHA)
+			c, _, err := githubClient.Repositories.GetCommit(ctx, owner, repo, *c.SHA)
 			if err != nil {
 				continue
 			}
-			files := commit.Files
+			files := c.Files
 			for _, f := range files {
 				skipFile := false
 				if f.Patch == nil || f.Filename == nil {
@@ -66,17 +66,17 @@ func auditGithubPR() ([]Leak, error) {
 					continue
 				}
 
-				diff := gitDiff{
-					sha:          commit.GetSHA(),
+				commit := commitInfo{
+					sha:          c.GetSHA(),
 					content:      *f.Patch,
 					filePath:     *f.Filename,
 					repoName:     repo,
-					githubCommit: commit,
-					author:       commit.GetCommitter().GetLogin(),
-					message:      *commit.Commit.Message,
-					date:         *commit.Commit.Committer.Date,
+					githubCommit: c,
+					author:       c.GetCommitter().GetLogin(),
+					message:      *c.Commit.Message,
+					date:         *c.Commit.Committer.Date,
 				}
-				leaks = append(leaks, inspect(diff)...)
+				leaks = append(leaks, inspect(commit)...)
 			}
 		}
 		page = resp.NextPage

+ 2 - 2
src/gitlab.go

@@ -78,13 +78,13 @@ func auditGitlabRepos() ([]Leak, error) {
 	}
 
 	for _, p := range repos {
-		repoD, err := cloneGitlabRepo(tempDir, p)
+		repoInfo, err := cloneGitlabRepo(tempDir, p)
 		if err != nil {
 			log.Warn(err)
 			continue
 		}
 
-		leaksFromRepo, err := repoD.audit()
+		leaksFromRepo, err := repoInfo.audit()
 		if err != nil {
 			log.Warn(err)
 		}

+ 115 - 128
src/repo.go

@@ -7,16 +7,30 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
+	"time"
 
+	"github.com/google/go-github/github"
 	log "github.com/sirupsen/logrus"
 	git "gopkg.in/src-d/go-git.v4"
-	"gopkg.in/src-d/go-git.v4/plumbing"
 	diffType "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
 	"gopkg.in/src-d/go-git.v4/plumbing/object"
 	"gopkg.in/src-d/go-git.v4/plumbing/storer"
 	"gopkg.in/src-d/go-git.v4/storage/memory"
 )
 
+// Leak represents a leaked secret or regex match.
+type Leak struct {
+	Line     string    `json:"line"`
+	Commit   string    `json:"commit"`
+	Offender string    `json:"offender"`
+	Type     string    `json:"reason"`
+	Message  string    `json:"commitMsg"`
+	Author   string    `json:"author"`
+	File     string    `json:"file"`
+	Repo     string    `json:"repo"`
+	Date     time.Time `json:"date"`
+}
+
 // RepoInfo contains a src-d git repository and other data about the repo
 type RepoInfo struct {
 	path       string
@@ -26,6 +40,18 @@ type RepoInfo struct {
 	err        error
 }
 
+type commitInfo struct {
+	content      string
+	commit       *object.Commit
+	filePath     string
+	repoName     string
+	githubCommit *github.RepositoryCommit
+	sha          string
+	message      string
+	author       string
+	date         time.Time
+}
+
 func newRepoInfo() (*RepoInfo, error) {
 	for _, re := range config.WhiteList.repos {
 		if re.FindString(opts.Repo) != "" {
@@ -39,7 +65,8 @@ func newRepoInfo() (*RepoInfo, error) {
 	}, nil
 }
 
-func (repoD *RepoInfo) clone() error {
+// clone will clone a repo
+func (repoInfo *RepoInfo) clone() error {
 	var (
 		err  error
 		repo *git.Repository
@@ -63,9 +90,9 @@ func (repoD *RepoInfo) clone() error {
 				Progress: os.Stdout,
 			})
 		}
-	} else if repoD.path != "" {
+	} else if repoInfo.path != "" {
 		log.Infof("opening %s", opts.RepoPath)
-		repo, err = git.PlainOpen(repoD.path)
+		repo, err = git.PlainOpen(repoInfo.path)
 	} else {
 		// cloning to memory
 		log.Infof("cloning %s", opts.Repo)
@@ -82,66 +109,43 @@ func (repoD *RepoInfo) clone() error {
 			})
 		}
 	}
-	repoD.repository = repo
-	repoD.err = err
+	repoInfo.repository = repo
+	repoInfo.err = err
 	return err
 }
 
-func (repoD *RepoInfo) audit() ([]Leak, error) {
+// audit performs an audit
+func (repoInfo *RepoInfo) audit() ([]Leak, error) {
 	var (
-		err   error
-		leaks []Leak
+		err         error
+		leaks       []Leak
+		commitCount int64
+		commitWg    sync.WaitGroup
+		mutex       = &sync.Mutex{}
+		semaphore   chan bool
 	)
 	for _, re := range config.WhiteList.repos {
-		if re.FindString(repoD.name) != "" {
-			return leaks, fmt.Errorf("skipping %s, whitelisted", repoD.name)
+		if re.FindString(repoInfo.name) != "" {
+			return leaks, fmt.Errorf("skipping %s, whitelisted", repoInfo.name)
 		}
 	}
 
 	// check if target contains an external gitleaks toml
 	if opts.RepoConfig {
-		err := config.updateFromRepo(repoD)
+		err := config.updateFromRepo(repoInfo)
 		if err != nil {
 			return leaks, nil
 		}
 	}
 
-	// clear commit cache
-	commitMap = make(map[string]bool)
-
-	refs, err := repoD.repository.Storer.IterReferences()
+	// iterate all through commits
+	cIter, err := repoInfo.repository.Log(&git.LogOptions{
+		All: true,
+	})
 	if err != nil {
-		return leaks, err
+		return leaks, nil
 	}
-	err = refs.ForEach(func(ref *plumbing.Reference) error {
-		if ref.Name().IsTag() {
-			return nil
-		}
-		branchLeaks := repoD.auditRef(ref)
-		for _, leak := range branchLeaks {
-			leaks = append(leaks, leak)
-		}
-		return nil
-	})
-	return leaks, err
-}
 
-// auditGitReference beings the audit for a git reference. This function will
-// traverse the git reference and audit each line of each diff.
-func (repoD *RepoInfo) auditRef(ref *plumbing.Reference) []Leak {
-	var (
-		err         error
-		repoName    string
-		leaks       []Leak
-		commitCount int64
-		commitWg    sync.WaitGroup
-		mutex       = &sync.Mutex{}
-		semaphore   chan bool
-	)
-	if auditDone {
-		return nil
-	}
-	repoName = repoD.name
 	if opts.Threads != 0 {
 		threads = opts.Threads
 	}
@@ -150,95 +154,27 @@ func (repoD *RepoInfo) auditRef(ref *plumbing.Reference) []Leak {
 	}
 	semaphore = make(chan bool, threads)
 
-	cIter, err := repoD.repository.Log(&git.LogOptions{From: ref.Hash()})
-	if err != nil {
-		return nil
-	}
 	err = cIter.ForEach(func(c *object.Commit) error {
-		if c == nil || (opts.Depth != 0 && commitCount == opts.Depth) || auditDone {
-			if commitCount == opts.Depth {
-				auditDone = true
-			}
+		if c == nil || (opts.Depth != 0 && commitCount == opts.Depth) {
 			return storer.ErrStop
 		}
-		commitCount = commitCount + 1
+
 		if config.WhiteList.commits[c.Hash.String()] {
 			log.Infof("skipping commit: %s\n", c.Hash.String())
 			return nil
 		}
 
+		commitCount = commitCount + 1
+		totalCommits = totalCommits + 1
+
 		// commits w/o parent (root of git the git ref) or option for single commit is not empty str
 		if len(c.ParentHashes) == 0 || opts.Commit == c.Hash.String() {
-			if commitMap[c.Hash.String()] {
-				return nil
-			}
-
-			if opts.Commit == c.Hash.String() {
-				auditDone = true
-			}
-
-			cMutex.Lock()
-			commitMap[c.Hash.String()] = true
-			cMutex.Unlock()
-			totalCommits = totalCommits + 1
-
-			fIter, err := c.Files()
-			if err != nil {
-				return nil
-			}
-			err = fIter.ForEach(func(f *object.File) error {
-				bin, err := f.IsBinary()
-				if bin || err != nil {
-					return nil
-				}
-				for _, re := range config.WhiteList.files {
-					if re.FindString(f.Name) != "" {
-						log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), f.Name)
-						return nil
-					}
-				}
-				content, err := f.Contents()
-				if err != nil {
-					return nil
-				}
-				diff := gitDiff{
-					repoName: repoName,
-					filePath: f.Name,
-					content:  content,
-					sha:      c.Hash.String(),
-					author:   c.Author.String(),
-					message:  strings.Replace(c.Message, "\n", " ", -1),
-					date:     c.Author.When,
-				}
-				fileLeaks := inspect(diff)
-				mutex.Lock()
-				leaks = append(leaks, fileLeaks...)
-				mutex.Unlock()
-				return nil
-			})
+			leaks = append(repoInfo.auditSingleCommit(c, mutex), leaks...)
 			return nil
 		}
 
-		// single commit
-		if opts.Commit != "" {
-			return nil
-		}
-
-		skipCount := false
+		// regular commit audit
 		err = c.Parents().ForEach(func(parent *object.Commit) error {
-			// check if we've seen this diff before
-			if commitMap[c.Hash.String()+parent.Hash.String()] {
-				return nil
-			}
-			cMutex.Lock()
-			commitMap[c.Hash.String()+parent.Hash.String()] = true
-			cMutex.Unlock()
-
-			if !skipCount {
-				totalCommits = totalCommits + 1
-				skipCount = true
-			}
-
 			commitWg.Add(1)
 			semaphore <- true
 			go func(c *object.Commit, parent *object.Commit) {
@@ -283,8 +219,8 @@ func (repoD *RepoInfo) auditRef(ref *plumbing.Reference) []Leak {
 					chunks := f.Chunks()
 					for _, chunk := range chunks {
 						if chunk.Type() == diffType.Add || chunk.Type() == diffType.Delete {
-							diff := gitDiff{
-								repoName: repoName,
+							diff := commitInfo{
+								repoName: repoInfo.name,
 								filePath: filePath,
 								content:  chunk.Content(),
 								sha:      c.Hash.String(),
@@ -306,14 +242,65 @@ func (repoD *RepoInfo) auditRef(ref *plumbing.Reference) []Leak {
 			return nil
 		})
 
-		// stop audit if we are at commitStop
-		if c.Hash.String() == opts.CommitStop {
-			auditDone = true
-			return storer.ErrStop
-		}
-
 		return nil
 	})
+
 	commitWg.Wait()
+	return leaks, nil
+	// // clear commit cache
+	// commitMap = make(map[string]bool)
+
+	// refs, err := repoInfo.repository.Storer.IterReferences()
+	// if err != nil {
+	// 	return leaks, err
+	// }
+	// err = refs.ForEach(func(ref *plumbing.Reference) error {
+	// 	if ref.Name().IsTag() {
+	// 		return nil
+	// 	}
+	// 	branchLeaks := repoInfo.auditRef(ref)
+	// 	for _, leak := range branchLeaks {
+	// 		leaks = append(leaks, leak)
+	// 	}
+	// 	return nil
+	// })
+}
+
+func (repoInfo *RepoInfo) auditSingleCommit(c *object.Commit, mutex *sync.Mutex) []Leak {
+	var leaks []Leak
+	fIter, err := c.Files()
+	if err != nil {
+		return nil
+	}
+	err = fIter.ForEach(func(f *object.File) error {
+		bin, err := f.IsBinary()
+		if bin || err != nil {
+			return nil
+		}
+		for _, re := range config.WhiteList.files {
+			if re.FindString(f.Name) != "" {
+				log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), f.Name)
+				return nil
+			}
+		}
+		content, err := f.Contents()
+		if err != nil {
+			return nil
+		}
+		diff := commitInfo{
+			repoName: repoInfo.name,
+			filePath: f.Name,
+			content:  content,
+			sha:      c.Hash.String(),
+			author:   c.Author.String(),
+			message:  strings.Replace(c.Message, "\n", " ", -1),
+			date:     c.Author.When,
+		}
+		fileLeaks := inspect(diff)
+		mutex.Lock()
+		leaks = append(leaks, fileLeaks...)
+		mutex.Unlock()
+		return nil
+	})
 	return leaks
 }

+ 11 - 12
src/utils.go

@@ -73,13 +73,13 @@ func writeReport(leaks []Leak) error {
 // a set of regexes set by the config (see gitleaks.toml for example). This function
 // will skip lines that include a whitelisted regex. A list of leaks is returned.
 // If verbose mode (-v/--verbose) is set, then checkDiff will log leaks as they are discovered.
-func inspect(diff gitDiff) []Leak {
+func inspect(commit commitInfo) []Leak {
 	var (
 		leaks    []Leak
 		skipLine bool
 	)
 
-	lines := strings.Split(diff.content, "\n")
+	lines := strings.Split(commit.content, "\n")
 
 	for _, line := range lines {
 		skipLine = false
@@ -91,7 +91,7 @@ func inspect(diff gitDiff) []Leak {
 			if skipLine = isLineWhitelisted(line); skipLine {
 				break
 			}
-			leaks = addLeak(leaks, line, match, re.description, diff)
+			leaks = addLeak(leaks, line, match, re.description, commit)
 		}
 
 		if !skipLine && (opts.Entropy > 0 || len(config.Entropy.entropyRanges) != 0) {
@@ -108,7 +108,7 @@ func inspect(diff gitDiff) []Leak {
 				if skipLine = !highEntropyLineIsALeak(line) || isLineWhitelisted(line); skipLine {
 					break
 				}
-				leaks = addLeak(leaks, line, word, fmt.Sprintf("Entropy: %.2f", entropy), diff)
+				leaks = addLeak(leaks, line, word, fmt.Sprintf("Entropy: %.2f", entropy), commit)
 			}
 		}
 	}
@@ -127,17 +127,17 @@ func isLineWhitelisted(line string) bool {
 }
 
 // addLeak is helper for func inspect() to append leaks if found during a diff check.
-func addLeak(leaks []Leak, line string, offender string, leakType string, diff gitDiff) []Leak {
+func addLeak(leaks []Leak, line string, offender string, leakType string, commit commitInfo) []Leak {
 	leak := Leak{
 		Line:     line,
-		Commit:   diff.sha,
+		Commit:   commit.sha,
 		Offender: offender,
 		Type:     leakType,
-		Author:   diff.author,
-		File:     diff.filePath,
-		Repo:     diff.repoName,
-		Message:  diff.message,
-		Date:     diff.date,
+		Author:   commit.author,
+		File:     commit.filePath,
+		Repo:     commit.repoName,
+		Message:  commit.message,
+		Date:     commit.date,
 	}
 	if opts.Redact {
 		leak.Offender = "REDACTED"
@@ -182,7 +182,6 @@ func (leak Leak) log() {
 
 func containsGit(repoPath string) bool {
 	if _, err := os.Stat(repoPath); os.IsNotExist(err) {
-		// path/to/whatever does not exist
 		return false
 	}
 	return true