소스 검색

Small refactor `detect` and `sources` (#1297)

Zachary Rice 2 년 전
부모
커밋
ca7aa14542
11개의 변경된 파일359개의 추가작업 그리고 346개의 파일을 삭제
  1. 11 3
      cmd/detect.go
  2. 5 12
      cmd/protect.go
  3. 32 0
      detect/baseline.go
  4. 48 316
      detect/detect.go
  5. 13 4
      detect/detect_test.go
  6. 71 0
      detect/filesystem.go
  7. 73 0
      detect/git.go
  8. 38 0
      detect/reader.go
  9. 57 0
      sources/filesystem.go
  10. 10 10
      sources/git.go
  11. 1 1
      sources/git_test.go

+ 11 - 3
cmd/detect.go

@@ -7,8 +7,8 @@ import (
 	"github.com/rs/zerolog/log"
 	"github.com/spf13/cobra"
 
-	"github.com/zricethezav/gitleaks/v8/detect"
 	"github.com/zricethezav/gitleaks/v8/report"
+	"github.com/zricethezav/gitleaks/v8/sources"
 )
 
 func init() {
@@ -63,7 +63,11 @@ func runDetect(cmd *cobra.Command, args []string) {
 
 	// start the detector scan
 	if noGit {
-		findings, err = detector.DetectFiles(source)
+		paths, err := sources.FilesystemTargets(source, detector.Sema, detector.FollowSymlinks)
+		if err != nil {
+			log.Fatal().Err(err)
+		}
+		findings, err = detector.DetectFiles(paths)
 		if err != nil {
 			// don't exit on error, just log it
 			log.Error().Err(err).Msg("")
@@ -81,7 +85,11 @@ func runDetect(cmd *cobra.Command, args []string) {
 		if err != nil {
 			log.Fatal().Err(err).Msg("")
 		}
-		findings, err = detector.DetectGit(source, logOpts, detect.DetectType)
+		gitCmd, err := sources.NewGitLogCmd(source, logOpts)
+		if err != nil {
+			log.Fatal().Err(err).Msg("")
+		}
+		findings, err = detector.DetectGit(gitCmd)
 		if err != nil {
 			// don't exit on error, just log it
 			log.Error().Err(err).Msg("")

+ 5 - 12
cmd/protect.go

@@ -6,8 +6,8 @@ import (
 	"github.com/rs/zerolog/log"
 	"github.com/spf13/cobra"
 
-	"github.com/zricethezav/gitleaks/v8/detect"
 	"github.com/zricethezav/gitleaks/v8/report"
+	"github.com/zricethezav/gitleaks/v8/sources"
 )
 
 func init() {
@@ -35,22 +35,15 @@ func runProtect(cmd *cobra.Command, args []string) {
 		log.Fatal().Err(err).Msg("")
 	}
 	start := time.Now()
-
 	detector := Detector(cmd, cfg, source)
 
-	// get log options for git scan
-	logOpts, err := cmd.Flags().GetString("log-opts")
-	if err != nil {
-		log.Fatal().Err(err).Msg("")
-	}
-
 	// start git scan
 	var findings []report.Finding
-	if staged {
-		findings, err = detector.DetectGit(source, logOpts, detect.ProtectStagedType)
-	} else {
-		findings, err = detector.DetectGit(source, logOpts, detect.ProtectType)
+	gitCmd, err := sources.NewGitDiffCmd(source, staged)
+	if err != nil {
+		log.Fatal().Err(err).Msg("")
 	}
+	findings, err = detector.DetectGit(gitCmd)
 
 	findingSummaryAndExit(findings, cmd, cfg, exitCode, start, err)
 }

+ 32 - 0
detect/baseline.go

@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
+	"path/filepath"
 
 	"github.com/zricethezav/gitleaks/v8/report"
 )
@@ -49,3 +50,34 @@ func LoadBaseline(baselinePath string) ([]report.Finding, error) {
 
 	return previousFindings, nil
 }
+
+func (d *Detector) AddBaseline(baselinePath string, source string) error {
+	if baselinePath != "" {
+		absoluteSource, err := filepath.Abs(source)
+		if err != nil {
+			return err
+		}
+
+		absoluteBaseline, err := filepath.Abs(baselinePath)
+		if err != nil {
+			return err
+		}
+
+		relativeBaseline, err := filepath.Rel(absoluteSource, absoluteBaseline)
+		if err != nil {
+			return err
+		}
+
+		baseline, err := LoadBaseline(baselinePath)
+		if err != nil {
+			return err
+		}
+
+		d.baseline = baseline
+		baselinePath = relativeBaseline
+
+	}
+
+	d.baselinePath = baselinePath
+	return nil
+}

+ 48 - 316
detect/detect.go

@@ -4,38 +4,22 @@ import (
 	"bufio"
 	"context"
 	"fmt"
-	"io"
-	"io/fs"
 	"os"
-	"path/filepath"
 	"regexp"
 	"strings"
 	"sync"
 
-	"github.com/h2non/filetype"
 	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/detect/git"
 	"github.com/zricethezav/gitleaks/v8/report"
 
 	ahocorasick "github.com/BobuSumisu/aho-corasick"
 	"github.com/fatih/semgroup"
-	"github.com/gitleaks/go-gitdiff/gitdiff"
 
 	"github.com/rs/zerolog/log"
 	"github.com/spf13/viper"
 )
 
-// Type used to differentiate between git scan types:
-// $ gitleaks detect
-// $ gitleaks protect
-// $ gitleaks protect staged
-type GitScanType int
-
 const (
-	DetectType GitScanType = iota
-	ProtectType
-	ProtectStagedType
-
 	gitleaksAllowSignature = "gitleaks:allow"
 	chunkSize              = 10 * 1_000 // 10kb
 )
@@ -90,6 +74,9 @@ type Detector struct {
 
 	// gitleaksIgnore
 	gitleaksIgnore map[string]bool
+
+	// Sema (https://github.com/fatih/semgroup) controls the concurrency
+	Sema *semgroup.Group
 }
 
 // Fragment contains the data to be scanned
@@ -122,6 +109,7 @@ func NewDetector(cfg config.Config) *Detector {
 		findings:       make([]report.Finding, 0),
 		Config:         cfg,
 		prefilter:      *ahocorasick.NewTrieBuilder().AddStrings(cfg.Keywords).Build(),
+		Sema:           semgroup.NewGroup(context.Background(), 40),
 	}
 }
 
@@ -166,37 +154,6 @@ func (d *Detector) AddGitleaksIgnore(gitleaksIgnorePath string) error {
 	return nil
 }
 
-func (d *Detector) AddBaseline(baselinePath string, source string) error {
-	if baselinePath != "" {
-		absoluteSource, err := filepath.Abs(source)
-		if err != nil {
-			return err
-		}
-
-		absoluteBaseline, err := filepath.Abs(baselinePath)
-		if err != nil {
-			return err
-		}
-
-		relativeBaseline, err := filepath.Rel(absoluteSource, absoluteBaseline)
-		if err != nil {
-			return err
-		}
-
-		baseline, err := LoadBaseline(baselinePath)
-		if err != nil {
-			return err
-		}
-
-		d.baseline = baseline
-		baselinePath = relativeBaseline
-
-	}
-
-	d.baselinePath = baselinePath
-	return nil
-}
-
 // DetectBytes scans the given bytes and returns a list of findings
 func (d *Detector) DetectBytes(content []byte) []report.Finding {
 	return d.DetectString(string(content))
@@ -209,6 +166,50 @@ func (d *Detector) DetectString(content string) []report.Finding {
 	})
 }
 
+// Detect scans the given fragment and returns a list of findings
+func (d *Detector) Detect(fragment Fragment) []report.Finding {
+	var findings []report.Finding
+
+	// initiate fragment keywords
+	fragment.keywords = make(map[string]bool)
+
+	// check if filepath is allowed
+	if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
+		fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) {
+		return findings
+	}
+
+	// add newline indices for location calculation in detectRule
+	fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
+
+	// build keyword map for prefiltering rules
+	normalizedRaw := strings.ToLower(fragment.Raw)
+	matches := d.prefilter.MatchString(normalizedRaw)
+	for _, m := range matches {
+		fragment.keywords[normalizedRaw[m.Pos():int(m.Pos())+len(m.Match())]] = true
+	}
+
+	for _, rule := range d.Config.Rules {
+		if len(rule.Keywords) == 0 {
+			// if not keywords are associated with the rule always scan the
+			// fragment using the rule
+			findings = append(findings, d.detectRule(fragment, rule)...)
+			continue
+		}
+		fragmentContainsKeyword := false
+		// check if keywords are in the fragment
+		for _, k := range rule.Keywords {
+			if _, ok := fragment.keywords[strings.ToLower(k)]; ok {
+				fragmentContainsKeyword = true
+			}
+		}
+		if fragmentContainsKeyword {
+			findings = append(findings, d.detectRule(fragment, rule)...)
+		}
+	}
+	return filter(findings, d.Redact)
+}
+
 // detectRule scans the given fragment for the given rule and returns a list of findings
 func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Finding {
 	var findings []report.Finding
@@ -364,275 +365,6 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
 	return findings
 }
 
-// DetectGit accepts source directory, log opts and GitScanType and returns a slice of report.Finding.
-func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
-	var (
-		diffFilesCmd *git.DiffFilesCmd
-		err          error
-	)
-	switch gitScanType {
-	case DetectType:
-		diffFilesCmd, err = git.NewGitLogCmd(source, logOpts)
-		if err != nil {
-			return d.findings, err
-		}
-	case ProtectType:
-		diffFilesCmd, err = git.NewGitDiffCmd(source, false)
-		if err != nil {
-			return d.findings, err
-		}
-	case ProtectStagedType:
-		diffFilesCmd, err = git.NewGitDiffCmd(source, true)
-		if err != nil {
-			return d.findings, err
-		}
-	}
-	defer diffFilesCmd.Wait()
-	diffFilesCh := diffFilesCmd.DiffFilesCh()
-	errCh := diffFilesCmd.ErrCh()
-
-	s := semgroup.NewGroup(context.Background(), 4)
-
-	// loop to range over both DiffFiles (stdout) and ErrCh (stderr)
-	for diffFilesCh != nil || errCh != nil {
-		select {
-		case gitdiffFile, open := <-diffFilesCh:
-			if !open {
-				diffFilesCh = nil
-				break
-			}
-
-			// skip binary files
-			if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
-				continue
-			}
-
-			// Check if commit is allowed
-			commitSHA := ""
-			if gitdiffFile.PatchHeader != nil {
-				commitSHA = gitdiffFile.PatchHeader.SHA
-				if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
-					continue
-				}
-			}
-			d.addCommit(commitSHA)
-
-			s.Go(func() error {
-				for _, textFragment := range gitdiffFile.TextFragments {
-					if textFragment == nil {
-						return nil
-					}
-
-					fragment := Fragment{
-						Raw:       textFragment.Raw(gitdiff.OpAdd),
-						CommitSHA: commitSHA,
-						FilePath:  gitdiffFile.NewName,
-					}
-
-					for _, finding := range d.Detect(fragment) {
-						d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
-					}
-				}
-				return nil
-			})
-		case err, open := <-errCh:
-			if !open {
-				errCh = nil
-				break
-			}
-
-			return d.findings, err
-		}
-	}
-
-	if err := s.Wait(); err != nil {
-		return d.findings, err
-	}
-	log.Info().Msgf("%d commits scanned.", len(d.commitMap))
-	log.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
-	return d.findings, nil
-}
-
-type scanTarget struct {
-	Path    string
-	Symlink string
-}
-
-// DetectFiles accepts a path to a source directory or file and begins a scan of the
-// file or directory.
-func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
-	s := semgroup.NewGroup(context.Background(), 4)
-	paths := make(chan scanTarget)
-	s.Go(func() error {
-		defer close(paths)
-		return filepath.Walk(source,
-			func(path string, fInfo os.FileInfo, err error) error {
-				if err != nil {
-					return err
-				}
-				if fInfo.Name() == ".git" && fInfo.IsDir() {
-					return filepath.SkipDir
-				}
-				if fInfo.Size() == 0 {
-					return nil
-				}
-				if fInfo.Mode().IsRegular() {
-					paths <- scanTarget{
-						Path:    path,
-						Symlink: "",
-					}
-				}
-				if fInfo.Mode().Type() == fs.ModeSymlink && d.FollowSymlinks {
-					realPath, err := filepath.EvalSymlinks(path)
-					if err != nil {
-						return err
-					}
-					realPathFileInfo, _ := os.Stat(realPath)
-					if realPathFileInfo.IsDir() {
-						log.Debug().Msgf("found symlinked directory: %s -> %s [skipping]", path, realPath)
-						return nil
-					}
-					paths <- scanTarget{
-						Path:    realPath,
-						Symlink: path,
-					}
-				}
-				return nil
-			})
-	})
-	for pa := range paths {
-		p := pa
-		s.Go(func() error {
-			f, err := os.Open(p.Path)
-			if err != nil {
-				return err
-			}
-			defer f.Close()
-
-			// Buffer to hold file chunks
-			buf := make([]byte, chunkSize)
-			totalLines := 0
-			for {
-				n, err := f.Read(buf)
-				if err != nil && err != io.EOF {
-					return err
-				}
-				if n == 0 {
-					break
-				}
-
-				// TODO: optimization could be introduced here
-				mimetype, err := filetype.Match(buf[:n])
-				if err != nil {
-					return err
-				}
-				if mimetype.MIME.Type == "application" {
-					return nil // skip binary files
-				}
-
-				// Count the number of newlines in this chunk
-				linesInChunk := strings.Count(string(buf[:n]), "\n")
-				totalLines += linesInChunk
-				fragment := Fragment{
-					Raw:      string(buf[:n]),
-					FilePath: p.Path,
-				}
-				if p.Symlink != "" {
-					fragment.SymlinkFile = p.Symlink
-				}
-				for _, finding := range d.Detect(fragment) {
-					// need to add 1 since line counting starts at 1
-					finding.StartLine += (totalLines - linesInChunk) + 1
-					finding.EndLine += (totalLines - linesInChunk) + 1
-					d.addFinding(finding)
-				}
-			}
-
-			return nil
-		})
-	}
-
-	if err := s.Wait(); err != nil {
-		return d.findings, err
-	}
-
-	return d.findings, nil
-}
-
-// DetectReader accepts an io.Reader and a buffer size for the reader in KB
-func (d *Detector) DetectReader(r io.Reader, bufSize int) ([]report.Finding, error) {
-	reader := bufio.NewReader(r)
-	buf := make([]byte, 0, 1000*bufSize)
-	findings := []report.Finding{}
-
-	for {
-		n, err := reader.Read(buf[:cap(buf)])
-		buf = buf[:n]
-		if err != nil {
-			if err != io.EOF {
-				return findings, err
-			}
-			break
-		}
-
-		fragment := Fragment{
-			Raw: string(buf),
-		}
-		for _, finding := range d.Detect(fragment) {
-			findings = append(findings, finding)
-			if d.Verbose {
-				printFinding(finding, d.NoColor)
-			}
-		}
-	}
-
-	return findings, nil
-}
-
-// Detect scans the given fragment and returns a list of findings
-func (d *Detector) Detect(fragment Fragment) []report.Finding {
-	var findings []report.Finding
-
-	// initiate fragment keywords
-	fragment.keywords = make(map[string]bool)
-
-	// check if filepath is allowed
-	if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
-		fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) {
-		return findings
-	}
-
-	// add newline indices for location calculation in detectRule
-	fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
-
-	// build keyword map for prefiltering rules
-	normalizedRaw := strings.ToLower(fragment.Raw)
-	matches := d.prefilter.MatchString(normalizedRaw)
-	for _, m := range matches {
-		fragment.keywords[normalizedRaw[m.Pos():int(m.Pos())+len(m.Match())]] = true
-	}
-
-	for _, rule := range d.Config.Rules {
-		if len(rule.Keywords) == 0 {
-			// if not keywords are associated with the rule always scan the
-			// fragment using the rule
-			findings = append(findings, d.detectRule(fragment, rule)...)
-			continue
-		}
-		fragmentContainsKeyword := false
-		// check if keywords are in the fragment
-		for _, k := range rule.Keywords {
-			if _, ok := fragment.keywords[strings.ToLower(k)]; ok {
-				fragmentContainsKeyword = true
-			}
-		}
-		if fragmentContainsKeyword {
-			findings = append(findings, d.detectRule(fragment, rule)...)
-		}
-	}
-	return filter(findings, d.Redact)
-}
-
 // addFinding synchronously adds a finding to the findings slice
 func (d *Detector) addFinding(finding report.Finding) {
 	globalFingerprint := fmt.Sprintf("%s:%s:%d", finding.File, finding.RuleID, finding.StartLine)

+ 13 - 4
detect/detect_test.go

@@ -12,6 +12,7 @@ import (
 
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/report"
+	"github.com/zricethezav/gitleaks/v8/sources"
 )
 
 const configPath = "../testdata/config/"
@@ -466,7 +467,9 @@ func TestFromGit(t *testing.T) {
 		err = detector.AddGitleaksIgnore(ignorePath)
 		require.NoError(t, err)
 
-		findings, err := detector.DetectGit(tt.source, tt.logOpts, DetectType)
+		gitCmd, err := sources.NewGitLogCmd(tt.source, tt.logOpts)
+		require.NoError(t, err)
+		findings, err := detector.DetectGit(gitCmd)
 		require.NoError(t, err)
 
 		for _, f := range findings {
@@ -533,7 +536,9 @@ func TestFromGitStaged(t *testing.T) {
 		detector := NewDetector(cfg)
 		err = detector.AddGitleaksIgnore(filepath.Join(tt.source, ".gitleaksignore"))
 		require.NoError(t, err)
-		findings, err := detector.DetectGit(tt.source, tt.logOpts, ProtectStagedType)
+		gitCmd, err := sources.NewGitDiffCmd(tt.source, true)
+		require.NoError(t, err)
+		findings, err := detector.DetectGit(gitCmd)
 		require.NoError(t, err)
 
 		for _, f := range findings {
@@ -625,7 +630,9 @@ func TestFromFiles(t *testing.T) {
 		err = detector.AddGitleaksIgnore(ignorePath)
 		require.NoError(t, err)
 		detector.FollowSymlinks = true
-		findings, err := detector.DetectFiles(tt.source)
+		paths, err := sources.FilesystemTargets(tt.source, detector.Sema, true)
+		require.NoError(t, err)
+		findings, err := detector.DetectFiles(paths)
 		require.NoError(t, err)
 		assert.ElementsMatch(t, tt.expectedFindings, findings)
 	}
@@ -674,7 +681,9 @@ func TestDetectWithSymlinks(t *testing.T) {
 		cfg, _ := vc.Translate()
 		detector := NewDetector(cfg)
 		detector.FollowSymlinks = true
-		findings, err := detector.DetectFiles(tt.source)
+		paths, err := sources.FilesystemTargets(tt.source, detector.Sema, true)
+		require.NoError(t, err)
+		findings, err := detector.DetectFiles(paths)
 		require.NoError(t, err)
 		assert.ElementsMatch(t, tt.expectedFindings, findings)
 	}

+ 71 - 0
detect/filesystem.go

@@ -0,0 +1,71 @@
+package detect
+
+import (
+	"io"
+	"os"
+	"strings"
+
+	"github.com/h2non/filetype"
+	"github.com/zricethezav/gitleaks/v8/report"
+	"github.com/zricethezav/gitleaks/v8/sources"
+)
+
+func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Finding, error) {
+	for pa := range paths {
+		p := pa
+		d.Sema.Go(func() error {
+			f, err := os.Open(p.Path)
+			if err != nil {
+				return err
+			}
+			defer f.Close()
+
+			// Buffer to hold file chunks
+			buf := make([]byte, chunkSize)
+			totalLines := 0
+			for {
+				n, err := f.Read(buf)
+				if err != nil && err != io.EOF {
+					return err
+				}
+				if n == 0 {
+					break
+				}
+
+				// TODO: optimization could be introduced here
+				mimetype, err := filetype.Match(buf[:n])
+				if err != nil {
+					return err
+				}
+				if mimetype.MIME.Type == "application" {
+					return nil // skip binary files
+				}
+
+				// Count the number of newlines in this chunk
+				linesInChunk := strings.Count(string(buf[:n]), "\n")
+				totalLines += linesInChunk
+				fragment := Fragment{
+					Raw:      string(buf[:n]),
+					FilePath: p.Path,
+				}
+				if p.Symlink != "" {
+					fragment.SymlinkFile = p.Symlink
+				}
+				for _, finding := range d.Detect(fragment) {
+					// need to add 1 since line counting starts at 1
+					finding.StartLine += (totalLines - linesInChunk) + 1
+					finding.EndLine += (totalLines - linesInChunk) + 1
+					d.addFinding(finding)
+				}
+			}
+
+			return nil
+		})
+	}
+
+	if err := d.Sema.Wait(); err != nil {
+		return d.findings, err
+	}
+
+	return d.findings, nil
+}

+ 73 - 0
detect/git.go

@@ -0,0 +1,73 @@
+package detect
+
+import (
+	"github.com/gitleaks/go-gitdiff/gitdiff"
+	"github.com/rs/zerolog/log"
+	"github.com/zricethezav/gitleaks/v8/report"
+	"github.com/zricethezav/gitleaks/v8/sources"
+)
+
+func (d *Detector) DetectGit(gitCmd *sources.GitCmd) ([]report.Finding, error) {
+	defer gitCmd.Wait()
+	diffFilesCh := gitCmd.DiffFilesCh()
+	errCh := gitCmd.ErrCh()
+
+	// loop to range over both DiffFiles (stdout) and ErrCh (stderr)
+	for diffFilesCh != nil || errCh != nil {
+		select {
+		case gitdiffFile, open := <-diffFilesCh:
+			if !open {
+				diffFilesCh = nil
+				break
+			}
+
+			// skip binary files
+			if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
+				continue
+			}
+
+			// Check if commit is allowed
+			commitSHA := ""
+			if gitdiffFile.PatchHeader != nil {
+				commitSHA = gitdiffFile.PatchHeader.SHA
+				if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
+					continue
+				}
+			}
+			d.addCommit(commitSHA)
+
+			d.Sema.Go(func() error {
+				for _, textFragment := range gitdiffFile.TextFragments {
+					if textFragment == nil {
+						return nil
+					}
+
+					fragment := Fragment{
+						Raw:       textFragment.Raw(gitdiff.OpAdd),
+						CommitSHA: commitSHA,
+						FilePath:  gitdiffFile.NewName,
+					}
+
+					for _, finding := range d.Detect(fragment) {
+						d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
+					}
+				}
+				return nil
+			})
+		case err, open := <-errCh:
+			if !open {
+				errCh = nil
+				break
+			}
+
+			return d.findings, err
+		}
+	}
+
+	if err := d.Sema.Wait(); err != nil {
+		return d.findings, err
+	}
+	log.Info().Msgf("%d commits scanned.", len(d.commitMap))
+	log.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
+	return d.findings, nil
+}

+ 38 - 0
detect/reader.go

@@ -0,0 +1,38 @@
+package detect
+
+import (
+	"bufio"
+	"io"
+
+	"github.com/zricethezav/gitleaks/v8/report"
+)
+
+// DetectReader accepts an io.Reader and a buffer size for the reader in KB
+func (d *Detector) DetectReader(r io.Reader, bufSize int) ([]report.Finding, error) {
+	reader := bufio.NewReader(r)
+	buf := make([]byte, 0, 1000*bufSize)
+	findings := []report.Finding{}
+
+	for {
+		n, err := reader.Read(buf[:cap(buf)])
+		buf = buf[:n]
+		if err != nil {
+			if err != io.EOF {
+				return findings, err
+			}
+			break
+		}
+
+		fragment := Fragment{
+			Raw: string(buf),
+		}
+		for _, finding := range d.Detect(fragment) {
+			findings = append(findings, finding)
+			if d.Verbose {
+				printFinding(finding, d.NoColor)
+			}
+		}
+	}
+
+	return findings, nil
+}

+ 57 - 0
sources/filesystem.go

@@ -0,0 +1,57 @@
+package sources
+
+import (
+	"io/fs"
+	"os"
+	"path/filepath"
+
+	"github.com/fatih/semgroup"
+	"github.com/rs/zerolog/log"
+)
+
+type ScanTarget struct {
+	Path    string
+	Symlink string
+}
+
+func FilesystemTargets(source string, s *semgroup.Group, followSymlinks bool) (<-chan ScanTarget, error) {
+	paths := make(chan ScanTarget)
+	s.Go(func() error {
+		defer close(paths)
+		return filepath.Walk(source,
+			func(path string, fInfo os.FileInfo, err error) error {
+				if err != nil {
+					return err
+				}
+				if fInfo.Name() == ".git" && fInfo.IsDir() {
+					return filepath.SkipDir
+				}
+				if fInfo.Size() == 0 {
+					return nil
+				}
+				if fInfo.Mode().IsRegular() {
+					paths <- ScanTarget{
+						Path:    path,
+						Symlink: "",
+					}
+				}
+				if fInfo.Mode().Type() == fs.ModeSymlink && followSymlinks {
+					realPath, err := filepath.EvalSymlinks(path)
+					if err != nil {
+						return err
+					}
+					realPathFileInfo, _ := os.Stat(realPath)
+					if realPathFileInfo.IsDir() {
+						log.Debug().Msgf("found symlinked directory: %s -> %s [skipping]", path, realPath)
+						return nil
+					}
+					paths <- ScanTarget{
+						Path:    realPath,
+						Symlink: path,
+					}
+				}
+				return nil
+			})
+	})
+	return paths, nil
+}

+ 10 - 10
detect/git/git.go → sources/git.go

@@ -1,4 +1,4 @@
-package git
+package sources
 
 import (
 	"bufio"
@@ -15,8 +15,8 @@ import (
 
 var quotedOptPattern = regexp.MustCompile(`^(?:"[^"]+"|'[^']+')$`)
 
-// DiffFilesCmd helps to work with Git's output.
-type DiffFilesCmd struct {
+// GitCmd helps to work with Git's output.
+type GitCmd struct {
 	cmd         *exec.Cmd
 	diffFilesCh <-chan *gitdiff.File
 	errCh       <-chan error
@@ -25,7 +25,7 @@ type DiffFilesCmd struct {
 // NewGitLogCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
 // Caller should read everything from channels until receiving a signal about their closure and call
 // the `func (*DiffFilesCmd) Wait()` error in order to release resources.
-func NewGitLogCmd(source string, logOpts string) (*DiffFilesCmd, error) {
+func NewGitLogCmd(source string, logOpts string) (*GitCmd, error) {
 	sourceClean := filepath.Clean(source)
 	var cmd *exec.Cmd
 	if logOpts != "" {
@@ -73,7 +73,7 @@ func NewGitLogCmd(source string, logOpts string) (*DiffFilesCmd, error) {
 		return nil, err
 	}
 
-	return &DiffFilesCmd{
+	return &GitCmd{
 		cmd:         cmd,
 		diffFilesCh: gitdiffFiles,
 		errCh:       errCh,
@@ -83,7 +83,7 @@ func NewGitLogCmd(source string, logOpts string) (*DiffFilesCmd, error) {
 // NewGitDiffCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
 // Caller should read everything from channels until receiving a signal about their closure and call
 // the `func (*DiffFilesCmd) Wait()` error in order to release resources.
-func NewGitDiffCmd(source string, staged bool) (*DiffFilesCmd, error) {
+func NewGitDiffCmd(source string, staged bool) (*GitCmd, error) {
 	sourceClean := filepath.Clean(source)
 	var cmd *exec.Cmd
 	cmd = exec.Command("git", "-C", sourceClean, "diff", "-U0", ".")
@@ -113,7 +113,7 @@ func NewGitDiffCmd(source string, staged bool) (*DiffFilesCmd, error) {
 		return nil, err
 	}
 
-	return &DiffFilesCmd{
+	return &GitCmd{
 		cmd:         cmd,
 		diffFilesCh: gitdiffFiles,
 		errCh:       errCh,
@@ -121,12 +121,12 @@ func NewGitDiffCmd(source string, staged bool) (*DiffFilesCmd, error) {
 }
 
 // DiffFilesCh returns a channel with *gitdiff.File.
-func (c *DiffFilesCmd) DiffFilesCh() <-chan *gitdiff.File {
+func (c *GitCmd) DiffFilesCh() <-chan *gitdiff.File {
 	return c.diffFilesCh
 }
 
 // ErrCh returns a channel that could produce an error if there is something in stderr.
-func (c *DiffFilesCmd) ErrCh() <-chan error {
+func (c *GitCmd) ErrCh() <-chan error {
 	return c.errCh
 }
 
@@ -134,7 +134,7 @@ func (c *DiffFilesCmd) ErrCh() <-chan error {
 // stdin or copying from stdout or stderr to complete.
 //
 // Wait also closes underlying stdout and stderr.
-func (c *DiffFilesCmd) Wait() (err error) {
+func (c *GitCmd) Wait() (err error) {
 	return c.cmd.Wait()
 }
 

+ 1 - 1
detect/git/git_test.go → sources/git_test.go

@@ -1,4 +1,4 @@
-package git_test
+package sources
 
 // TODO: commenting out this test for now because it's flaky. Alternatives to consider to get this working:
 // -- use `git stash` instead of `restore()`