소스 검색

refactor: fix #722 properly (#1250)

* refactor: fix #722 properly

* fix(deps): update go-gitdiff to v0.9.0

* refactor: second attempt

* refactor: better git package api

* fix: add comments
Savely Krasovsky 2 년 전
부모
커밋
45266551ba
4개의 변경된 파일124개의 추가작업 그리고 73개의 파일을 삭제
  1. 49 37
      detect/detect.go
  2. 72 28
      detect/git/git.go
  3. 1 1
      go.mod
  4. 2 7
      go.sum

+ 49 - 37
detect/detect.go

@@ -357,70 +357,85 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
 	return findings
 }
 
-// GitScan accepts a *gitdiff.File channel which contents a git history generated from
-// the output of `git log -p ...`. startGitScan will look at each file (patch) in the history
-// and determine if the patch contains any findings.
+// DetectGit accepts source directory, log opts and GitScanType and returns a slice of report.Finding.
 func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
 	var (
-		gitdiffFiles <-chan *gitdiff.File
+		diffFilesCmd *git.DiffFilesCmd
 		err          error
 	)
 	switch gitScanType {
 	case DetectType:
-		gitdiffFiles, err = git.GitLog(source, logOpts)
+		diffFilesCmd, err = git.NewGitLogCmd(source, logOpts)
 		if err != nil {
 			return d.findings, err
 		}
 	case ProtectType:
-		gitdiffFiles, err = git.GitDiff(source, false)
+		diffFilesCmd, err = git.NewGitDiffCmd(source, false)
 		if err != nil {
 			return d.findings, err
 		}
 	case ProtectStagedType:
-		gitdiffFiles, err = git.GitDiff(source, true)
+		diffFilesCmd, err = git.NewGitDiffCmd(source, true)
 		if err != nil {
 			return d.findings, err
 		}
 	}
+	defer diffFilesCmd.Wait()
+	diffFilesCh := diffFilesCmd.DiffFilesCh()
+	errCh := diffFilesCmd.ErrCh()
 
 	s := semgroup.NewGroup(context.Background(), 4)
 
-	for gitdiffFile := range gitdiffFiles {
-		gitdiffFile := gitdiffFile
-
-		// skip binary files
-		if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
-			continue
-		}
+	// loop to range over both DiffFiles (stdout) and ErrCh (stderr)
+	for diffFilesCh != nil || errCh != nil {
+		select {
+		case gitdiffFile, open := <-diffFilesCh:
+			if !open {
+				diffFilesCh = nil
+				break
+			}
 
-		// Check if commit is allowed
-		commitSHA := ""
-		if gitdiffFile.PatchHeader != nil {
-			commitSHA = gitdiffFile.PatchHeader.SHA
-			if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
+			// skip binary files
+			if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
 				continue
 			}
-		}
-		d.addCommit(commitSHA)
 
-		s.Go(func() error {
-			for _, textFragment := range gitdiffFile.TextFragments {
-				if textFragment == nil {
-					return nil
+			// Check if commit is allowed
+			commitSHA := ""
+			if gitdiffFile.PatchHeader != nil {
+				commitSHA = gitdiffFile.PatchHeader.SHA
+				if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
+					continue
 				}
+			}
+			d.addCommit(commitSHA)
 
-				fragment := Fragment{
-					Raw:       textFragment.Raw(gitdiff.OpAdd),
-					CommitSHA: commitSHA,
-					FilePath:  gitdiffFile.NewName,
-				}
+			s.Go(func() error {
+				for _, textFragment := range gitdiffFile.TextFragments {
+					if textFragment == nil {
+						return nil
+					}
 
-				for _, finding := range d.Detect(fragment) {
-					d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
+					fragment := Fragment{
+						Raw:       textFragment.Raw(gitdiff.OpAdd),
+						CommitSHA: commitSHA,
+						FilePath:  gitdiffFile.NewName,
+					}
+
+					for _, finding := range d.Detect(fragment) {
+						d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
+					}
 				}
+				return nil
+			})
+		case err, open := <-errCh:
+			if !open {
+				errCh = nil
+				break
 			}
-			return nil
-		})
+
+			return d.findings, err
+		}
 	}
 
 	if err := s.Wait(); err != nil {
@@ -428,9 +443,6 @@ func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanT
 	}
 	log.Info().Msgf("%d commits scanned.", len(d.commitMap))
 	log.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
-	if git.ErrEncountered {
-		return d.findings, fmt.Errorf("%s", "git error encountered, see logs")
-	}
 	return d.findings, nil
 }
 

+ 72 - 28
detect/git/git.go

@@ -2,23 +2,30 @@ package git
 
 import (
 	"bufio"
+	"errors"
 	"io"
 	"os/exec"
 	"path/filepath"
 	"regexp"
 	"strings"
-	"time"
 
 	"github.com/gitleaks/go-gitdiff/gitdiff"
 	"github.com/rs/zerolog/log"
 )
 
-var ErrEncountered bool
-
-// GitLog returns a channel of gitdiff.File objects from the
-// git log -p command for the given source.
 var quotedOptPattern = regexp.MustCompile(`^(?:"[^"]+"|'[^']+')$`)
-func GitLog(source string, logOpts string) (<-chan *gitdiff.File, error) {
+
+// DiffFilesCmd helps to work with Git's output.
+type DiffFilesCmd struct {
+	cmd         *exec.Cmd
+	diffFilesCh <-chan *gitdiff.File
+	errCh       <-chan error
+}
+
+// NewGitLogCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
+// Caller should read everything from channels until receiving a signal about their closure and call
+// the `func (*DiffFilesCmd) Wait()` error in order to release resources.
+func NewGitLogCmd(source string, logOpts string) (*DiffFilesCmd, error) {
 	sourceClean := filepath.Clean(source)
 	var cmd *exec.Cmd
 	if logOpts != "" {
@@ -54,21 +61,29 @@ func GitLog(source string, logOpts string) (<-chan *gitdiff.File, error) {
 	if err != nil {
 		return nil, err
 	}
+	if err := cmd.Start(); err != nil {
+		return nil, err
+	}
 
-	go listenForStdErr(stderr)
+	errCh := make(chan error)
+	go listenForStdErr(stderr, errCh)
 
-	if err := cmd.Start(); err != nil {
+	gitdiffFiles, err := gitdiff.Parse(stdout)
+	if err != nil {
 		return nil, err
 	}
-	// HACK: to avoid https://github.com/zricethezav/gitleaks/issues/722
-	time.Sleep(50 * time.Millisecond)
 
-	return gitdiff.Parse(cmd, stdout)
+	return &DiffFilesCmd{
+		cmd:         cmd,
+		diffFilesCh: gitdiffFiles,
+		errCh:       errCh,
+	}, nil
 }
 
-// GitDiff returns a channel of gitdiff.File objects from
-// the git diff command for the given source.
-func GitDiff(source string, staged bool) (<-chan *gitdiff.File, error) {
+// NewGitDiffCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
+// Caller should read everything from channels until receiving a signal about their closure and call
+// the `func (*DiffFilesCmd) Wait()` error in order to release resources.
+func NewGitDiffCmd(source string, staged bool) (*DiffFilesCmd, error) {
 	sourceClean := filepath.Clean(source)
 	var cmd *exec.Cmd
 	cmd = exec.Command("git", "-C", sourceClean, "diff", "-U0", ".")
@@ -86,21 +101,50 @@ func GitDiff(source string, staged bool) (<-chan *gitdiff.File, error) {
 	if err != nil {
 		return nil, err
 	}
+	if err := cmd.Start(); err != nil {
+		return nil, err
+	}
 
-	go listenForStdErr(stderr)
+	errCh := make(chan error)
+	go listenForStdErr(stderr, errCh)
 
-	if err := cmd.Start(); err != nil {
+	gitdiffFiles, err := gitdiff.Parse(stdout)
+	if err != nil {
 		return nil, err
 	}
-	// HACK: to avoid https://github.com/zricethezav/gitleaks/issues/722
-	time.Sleep(50 * time.Millisecond)
 
-	return gitdiff.Parse(cmd, stdout)
+	return &DiffFilesCmd{
+		cmd:         cmd,
+		diffFilesCh: gitdiffFiles,
+		errCh:       errCh,
+	}, nil
+}
+
+// DiffFilesCh returns a channel with *gitdiff.File.
+func (c *DiffFilesCmd) DiffFilesCh() <-chan *gitdiff.File {
+	return c.diffFilesCh
+}
+
+// ErrCh returns a channel that could produce an error if there is something in stderr.
+func (c *DiffFilesCmd) ErrCh() <-chan error {
+	return c.errCh
+}
+
+// Wait waits for the command to exit and waits for any copying to
+// stdin or copying from stdout or stderr to complete.
+//
+// Wait also closes underlying stdout and stderr.
+func (c *DiffFilesCmd) Wait() (err error) {
+	return c.cmd.Wait()
 }
 
-// listenForStdErr listens for stderr output from git and prints it to stdout
-// then exits with exit code 1
-func listenForStdErr(stderr io.ReadCloser) {
+// listenForStdErr listens for stderr output from git, prints it to stdout,
+// sends to errCh and closes it.
+func listenForStdErr(stderr io.ReadCloser, errCh chan<- error) {
+	defer close(errCh)
+
+	var errEncountered bool
+
 	scanner := bufio.NewScanner(stderr)
 	for scanner.Scan() {
 		// if git throws one of the following errors:
@@ -126,12 +170,12 @@ func listenForStdErr(stderr io.ReadCloser) {
 			log.Warn().Msg(scanner.Text())
 		} else {
 			log.Error().Msgf("[git] %s", scanner.Text())
-
-			// asynchronously set this error flag to true so that we can
-			// capture a log message and exit with a non-zero exit code
-			// This value should get set before the `git` command exits so it's
-			// safe-ish, although I know I know, bad practice.
-			ErrEncountered = true
+			errEncountered = true
 		}
 	}
+
+	if errEncountered {
+		errCh <- errors.New("stderr is not empty")
+		return
+	}
 }

+ 1 - 1
go.mod

@@ -5,7 +5,7 @@ go 1.19
 require (
 	github.com/charmbracelet/lipgloss v0.5.0
 	github.com/fatih/semgroup v1.2.0
-	github.com/gitleaks/go-gitdiff v0.8.0
+	github.com/gitleaks/go-gitdiff v0.9.0
 	github.com/h2non/filetype v1.1.3
 	github.com/rs/zerolog v1.26.1
 	github.com/spf13/cobra v1.2.1

+ 2 - 7
go.sum

@@ -76,8 +76,8 @@ github.com/fatih/semgroup v1.2.0/go.mod h1:1KAD4iIYfXjE4U13B48VM4z9QUwV5Tt8O4rS8
 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
-github.com/gitleaks/go-gitdiff v0.8.0 h1:7aExTZm+K/M/EQKOyYcub8rIAdWK6ONxPGuRzxmWW+0=
-github.com/gitleaks/go-gitdiff v0.8.0/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA=
+github.com/gitleaks/go-gitdiff v0.9.0 h1:SHAU2l0ZBEo8g82EeFewhVy81sb7JCxW76oSPtR/Nqg=
+github.com/gitleaks/go-gitdiff v0.9.0/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA=
 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
@@ -195,12 +195,10 @@ github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaW
 github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
 github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
 github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
-github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
 github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
 github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
 github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
@@ -219,7 +217,6 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68 h1:y1p/ycavWjGT9FnmSjdbWUlLGvcxrY0Rw3ATltrxOhk=
 github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
-github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0 h1:STjmj0uFfRryL9fzRA/OupNppeAID6QJYPMavTL7jtY=
 github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
 github.com/muesli/termenv v0.15.1 h1:UzuTb/+hhlBugQz28rpzey4ZuKcZ03MeKsoG7IJZIxs=
 github.com/muesli/termenv v0.15.1/go.mod h1:HeAQPTzpfs016yGtA4g00CsdYnVLJvxsS4ANqrZs2sQ=
@@ -440,8 +437,6 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 h1:WecRHqgE09JBkh/584XIE6PMz5KKE/vER4izNUi30AQ=
-golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=