Просмотр исходного кода

Refactor `detect`, add `entropy` to all findings (#804)

Refactor `detect`, add `entropy` to all findings
Zachary Rice 3 лет назад
Родитель
Сommit
6e72472b60
24 измененных файлов с 1086 добавлено и 913 удалено
  1. 62 27
      cmd/detect.go
  2. 47 14
      cmd/protect.go
  3. 11 4
      cmd/root.go
  4. 17 11
      config/allowlist.go
  5. 4 51
      config/config_test.go
  6. 26 33
      config/rule.go
  7. 0 36
      config/utils.go
  8. 325 97
      detect/detect.go
  9. 297 44
      detect/detect_test.go
  10. 0 77
      detect/files.go
  11. 0 80
      detect/files_test.go
  12. 0 95
      detect/git.go
  13. 0 0
      detect/git/git.go
  14. 158 0
      detect/git/git_test.go
  15. 0 160
      detect/git_test.go
  16. 5 2
      detect/location.go
  17. 1 1
      detect/location_test.go
  18. 107 0
      detect/utils.go
  19. 0 157
      git/git_test.go
  20. 4 4
      go.mod
  21. 13 6
      go.sum
  22. 0 8
      report/finding.go
  23. 4 5
      report/report.go
  24. 5 1
      report/sarif_test.go

+ 62 - 27
cmd/detect.go

@@ -11,7 +11,6 @@ import (
 
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/detect"
-	"github.com/zricethezav/gitleaks/v8/git"
 	"github.com/zricethezav/gitleaks/v8/report"
 )
 
@@ -35,56 +34,92 @@ func runDetect(cmd *cobra.Command, args []string) {
 		err      error
 	)
 
-	viper.Unmarshal(&vc)
+	// Load config
+	if err = viper.Unmarshal(&vc); err != nil {
+		log.Fatal().Err(err).Msg("Failed to load config")
+	}
 	cfg, err := vc.Translate()
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to load config")
 	}
-
 	cfg.Path, _ = cmd.Flags().GetString("config")
-	source, _ := cmd.Flags().GetString("source")
-	logOpts, _ := cmd.Flags().GetString("log-opts")
-	verbose, _ := cmd.Flags().GetBool("verbose")
-	redact, _ := cmd.Flags().GetBool("redact")
-	noGit, _ := cmd.Flags().GetBool("no-git")
-	exitCode, _ := cmd.Flags().GetInt("exit-code")
-	if cfg.Path == "" {
-		cfg.Path = filepath.Join(source, ".gitleaks.toml")
-	}
+
+	// start timer
 	start := time.Now()
 
+	// Setup detector
+	detector := detect.NewDetector(cfg)
+	detector.Config.Path, err = cmd.Flags().GetString("config")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+	source, err := cmd.Flags().GetString("source")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+	// if config path is not set, then use the {source}/.gitleaks.toml path.
+	// note that there may not be a `{source}/.gitleaks.toml` file, this is ok.
+	if detector.Config.Path == "" {
+		detector.Config.Path = filepath.Join(source, ".gitleaks.toml")
+	}
+	// set verbose flag
+	if detector.Verbose, err = cmd.Flags().GetBool("verbose"); err != nil {
+		log.Fatal().Err(err)
+	}
+	// set redact flag
+	if detector.Redact, err = cmd.Flags().GetBool("redact"); err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// set exit code
+	exitCode, err := cmd.Flags().GetInt("exit-code")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// determine what type of scan:
+	// - git: scan the history of the repo
+	// - no-git: scan files by treating the repo as a plain directory
+	noGit, err := cmd.Flags().GetBool("no-git")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// start the detector scan
 	if noGit {
-		if logOpts != "" {
-			log.Fatal().Err(err).Msg("--log-opts cannot be used with --no-git")
-		}
-		findings, err = detect.FromFiles(source, cfg, detect.Options{
-			Verbose: verbose,
-			Redact:  redact,
-		})
+		findings, err = detector.DetectFiles(source)
 		if err != nil {
-			log.Fatal().Err(err).Msg("Failed to scan files")
+			// don't exit on error, just log it
+			log.Error().Err(err)
 		}
+
 	} else {
-		files, err := git.GitLog(source, logOpts)
+		logOpts, err := cmd.Flags().GetString("log-opts")
 		if err != nil {
-			log.Fatal().Err(err).Msg("Failed to get git log")
+			log.Fatal().Err(err)
+		}
+		findings, err = detector.DetectGit(source, logOpts, detect.DetectType)
+		if err != nil {
+			// don't exit on error, just log it
+			log.Error().Err(err)
 		}
-
-		findings = detect.FromGit(files, cfg, detect.Options{Verbose: verbose, Redact: redact})
 	}
 
+	// log info about the scan
+	log.Info().Msgf("scan completed in %s", time.Since(start))
 	if len(findings) != 0 {
 		log.Warn().Msgf("leaks found: %d", len(findings))
 	} else {
 		log.Info().Msg("no leaks found")
 	}
 
-	log.Info().Msgf("scan completed in %s", time.Since(start))
-
+	// write report if desired
 	reportPath, _ := cmd.Flags().GetString("report-path")
 	ext, _ := cmd.Flags().GetString("report-format")
 	if reportPath != "" {
-		report.Write(findings, cfg, ext, reportPath)
+		if err = report.Write(findings, cfg, ext, reportPath); err != nil {
+			log.Fatal().Err(err)
+		}
 	}
 
 	if len(findings) != 0 {

+ 47 - 14
cmd/protect.go

@@ -11,7 +11,6 @@ import (
 
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/detect"
-	"github.com/zricethezav/gitleaks/v8/git"
 	"github.com/zricethezav/gitleaks/v8/report"
 )
 
@@ -30,41 +29,75 @@ func runProtect(cmd *cobra.Command, args []string) {
 	initConfig()
 	var vc config.ViperConfig
 
-	viper.Unmarshal(&vc)
+	if err := viper.Unmarshal(&vc); err != nil {
+		log.Fatal().Err(err).Msg("Failed to load config")
+	}
 	cfg, err := vc.Translate()
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to load config")
 	}
 
 	cfg.Path, _ = cmd.Flags().GetString("config")
-	source, _ := cmd.Flags().GetString("source")
-	verbose, _ := cmd.Flags().GetBool("verbose")
-	redact, _ := cmd.Flags().GetBool("redact")
 	exitCode, _ := cmd.Flags().GetInt("exit-code")
 	staged, _ := cmd.Flags().GetBool("staged")
-	if cfg.Path == "" {
-		cfg.Path = filepath.Join(source, ".gitleaks.toml")
-	}
 	start := time.Now()
 
-	files, err := git.GitDiff(source, staged)
+	// Setup detector
+	detector := detect.NewDetector(cfg)
+	detector.Config.Path, err = cmd.Flags().GetString("config")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+	source, err := cmd.Flags().GetString("source")
 	if err != nil {
-		log.Fatal().Err(err).Msg("Failed to get git log")
+		log.Fatal().Err(err)
+	}
+	// if config path is not set, then use the {source}/.gitleaks.toml path.
+	// note that there may not be a `{source}/.gitleaks.toml` file, this is ok.
+	if detector.Config.Path == "" {
+		detector.Config.Path = filepath.Join(source, ".gitleaks.toml")
+	}
+	// set verbose flag
+	if detector.Verbose, err = cmd.Flags().GetBool("verbose"); err != nil {
+		log.Fatal().Err(err)
+	}
+	// set redact flag
+	if detector.Redact, err = cmd.Flags().GetBool("redact"); err != nil {
+		log.Fatal().Err(err)
 	}
 
-	findings := detect.FromGit(files, cfg, detect.Options{Verbose: verbose, Redact: redact})
+	// get log options for git scan
+	logOpts, err := cmd.Flags().GetString("log-opts")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// start git scan
+	var findings []report.Finding
+	if staged {
+		findings, err = detector.DetectGit(source, logOpts, detect.ProtectStagedType)
+	} else {
+		findings, err = detector.DetectGit(source, logOpts, detect.ProtectType)
+	}
+	if err != nil {
+		// don't exit on error, just log it
+		log.Error().Err(err)
+	}
+
+	// log info about the scan
+	log.Info().Msgf("scan completed in %s", time.Since(start))
 	if len(findings) != 0 {
 		log.Warn().Msgf("leaks found: %d", len(findings))
 	} else {
 		log.Info().Msg("no leaks found")
 	}
 
-	log.Info().Msgf("scan duration: %s", time.Since(start))
-
 	reportPath, _ := cmd.Flags().GetString("report-path")
 	ext, _ := cmd.Flags().GetString("report-format")
 	if reportPath != "" {
-		report.Write(findings, cfg, ext, reportPath)
+		if err = report.Write(findings, cfg, ext, reportPath); err != nil {
+			log.Fatal().Err(err)
+		}
 	}
 	if len(findings) != 0 {
 		os.Exit(exitCode)

+ 11 - 4
cmd/root.go

@@ -45,7 +45,10 @@ func init() {
 	rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (debug, info, warn, error, fatal)")
 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
 	rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")
-	viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config"))
+	err := viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config"))
+	if err != nil {
+		log.Fatal().Msgf("err binding config %s", err.Error())
+	}
 }
 
 func initLog() {
@@ -71,7 +74,7 @@ func initLog() {
 }
 
 func initConfig() {
-	fmt.Fprintf(os.Stderr, banner)
+	fmt.Fprint(os.Stderr, banner)
 	cfgPath, err := rootCmd.Flags().GetString("config")
 	if err != nil {
 		log.Fatal().Msg(err.Error())
@@ -97,14 +100,18 @@ func initConfig() {
 			log.Debug().Msgf("Unable to load gitleaks config from %s since --source=%s is a file, using default config",
 				filepath.Join(source, ".gitleaks.toml"), source)
 			viper.SetConfigType("toml")
-			viper.ReadConfig(strings.NewReader(config.DefaultConfig))
+			if err = viper.ReadConfig(strings.NewReader(config.DefaultConfig)); err != nil {
+				log.Fatal().Msgf("err reading toml %s", err.Error())
+			}
 			return
 		}
 
 		if _, err := os.Stat(filepath.Join(source, ".gitleaks.toml")); os.IsNotExist(err) {
 			log.Debug().Msgf("No gitleaks config found in path %s, using default gitleaks config", filepath.Join(source, ".gitleaks.toml"))
 			viper.SetConfigType("toml")
-			viper.ReadConfig(strings.NewReader(config.DefaultConfig))
+			if err = viper.ReadConfig(strings.NewReader(config.DefaultConfig)); err != nil {
+				log.Fatal().Msgf("err reading default config toml %s", err.Error())
+			}
 			return
 		} else {
 			log.Debug().Msgf("Using existing gitleaks config %s from `(--source)/.gitleaks.toml`", filepath.Join(source, ".gitleaks.toml"))

+ 17 - 11
config/allowlist.go

@@ -2,13 +2,23 @@ package config
 
 import "regexp"
 
+// Allowlist allows a rule to be ignored for specific
+// regexes, paths, and/or commits
 type Allowlist struct {
+	// Short human readable description of the allowlist.
 	Description string
-	Regexes     []*regexp.Regexp
-	Paths       []*regexp.Regexp
-	Commits     []string
+
+	// Regexes is slice of content regular expressions that are allowed to be ignored.
+	Regexes []*regexp.Regexp
+
+	// Paths is a slice of path regular expressions that are allowed to be ignored.
+	Paths []*regexp.Regexp
+
+	// Commits is a slice of commit SHAs that are allowed to be ignored.
+	Commits []string
 }
 
+// CommitAllowed returns true if the commit is allowed to be ignored.
 func (a *Allowlist) CommitAllowed(c string) bool {
 	if c == "" {
 		return false
@@ -21,16 +31,12 @@ func (a *Allowlist) CommitAllowed(c string) bool {
 	return false
 }
 
+// PathAllowed returns true if the path is allowed to be ignored.
 func (a *Allowlist) PathAllowed(path string) bool {
-	if anyRegexMatch(path, a.Paths) {
-		return true
-	}
-	return false
+	return anyRegexMatch(path, a.Paths)
 }
 
+// RegexAllowed returns true if the regex is allowed to be ignored.
 func (a *Allowlist) RegexAllowed(s string) bool {
-	if anyRegexMatch(s, a.Regexes) {
-		return true
-	}
-	return false
+	return anyRegexMatch(s, a.Regexes)
 }

+ 4 - 51
config/config_test.go

@@ -103,7 +103,10 @@ func TestTranslate(t *testing.T) {
 		}
 
 		var vc ViperConfig
-		viper.Unmarshal(&vc)
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
 		cfg, err := vc.Translate()
 		if tt.wantError != nil {
 			if err == nil {
@@ -115,53 +118,3 @@ func TestTranslate(t *testing.T) {
 		assert.Equal(t, cfg.Rules, tt.cfg.Rules)
 	}
 }
-
-func TestIncludeEntropy(t *testing.T) {
-	tests := []struct {
-		rule    Rule
-		secret  string
-		entropy float32
-		include bool
-	}{
-		{
-			rule: Rule{
-				RuleID:      "generic-api-key",
-				SecretGroup: 4,
-				Entropy:     3.5,
-				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
-			},
-			secret:  `e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5`,
-			entropy: 3.7906235872459746,
-			include: true,
-		},
-		{
-			rule: Rule{
-				RuleID:      "generic-api-key",
-				SecretGroup: 4,
-				Entropy:     4,
-				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
-			},
-			secret:  `e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5`,
-			entropy: 3.7906235872459746,
-			include: false,
-		},
-		{
-			rule: Rule{
-				RuleID:      "generic-api-key",
-				SecretGroup: 4,
-				Entropy:     3.0,
-				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
-			},
-			secret:  `ssh-keyboard-interactive`,
-			entropy: 0,
-			include: false,
-		},
-	}
-
-	for _, tt := range tests {
-		include, entropy := tt.rule.IncludeEntropy(tt.secret)
-		assert.Equal(t, true, tt.rule.EntropySet())
-		assert.Equal(t, tt.entropy, float32(entropy))
-		assert.Equal(t, tt.include, include)
-	}
-}

+ 26 - 33
config/rule.go

@@ -2,44 +2,37 @@ package config
 
 import (
 	"regexp"
-	"strings"
 )
 
+// Rules contain information that define details on how to detect secrets
 type Rule struct {
+	// Description is the description of the rule.
 	Description string
-	RuleID      string
-	Entropy     float64
+
+	// RuleID is a unique identifier for this rule
+	RuleID string
+
+	// Entropy is a float representing the minimum shannon
+	// entropy a regex group must have to be considered a secret.
+	Entropy float64
+
+	// SecretGroup is an int used to extract secret from regex
+	// match and used as the group that will have its entropy
+	// checked if `entropy` is set.
 	SecretGroup int
-	Regex       *regexp.Regexp
-	Path        *regexp.Regexp
-	Tags        []string
-	Allowlist   Allowlist
-}
 
-func (r *Rule) IncludeEntropy(secret string) (bool, float64) {
-	// NOTE: this is a goofy hack to get around the fact there golang's regex engine
-	// does not support positive lookaheads. Ideally we would want to add a
-	// restriction on generic rules regex that requires the secret match group
-	// contains both numbers and alphabetical characters. What this bit of code does is
-	// check if the ruleid is prepended with "generic" and enforces the
-	// secret contains both digits and alphabetical characters.
-	if strings.HasPrefix(r.RuleID, "generic") {
-		if !containsDigit(secret) {
-			return false, 0.0
-		}
-	}
-	// group = 0 will check the entropy of the whole regex match
-	e := shannonEntropy(secret)
-	if e > r.Entropy {
-		return true, e
-	}
-
-	return false, e
-}
+	// Regex is a golang regular expression used to detect secrets.
+	Regex *regexp.Regexp
+
+	// Path is a golang regular expression used to
+	// filter secrets by path
+	Path *regexp.Regexp
+
+	// Tags is an array of strings used for metadata
+	// and reporting purposes.
+	Tags []string
 
-func (r *Rule) EntropySet() bool {
-	if r.Entropy == 0.0 {
-		return false
-	}
-	return true
+	// Allowlist allows a rule to be ignored for specific
+	// regexes, paths, and/or commits
+	Allowlist Allowlist
 }

+ 0 - 36
config/utils.go

@@ -1,7 +1,6 @@
 package config
 
 import (
-	"math"
 	"regexp"
 )
 
@@ -23,38 +22,3 @@ func regexMatched(f string, re *regexp.Regexp) bool {
 	}
 	return false
 }
-
-func containsDigit(s string) bool {
-	for _, c := range s {
-		switch c {
-		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
-			return true
-		}
-
-	}
-	return false
-}
-
-// shannonEntropy calculates the entropy of data using the formula defined here:
-// https://en.wiktionary.org/wiki/Shannon_entropy
-// Another way to think about what this is doing is calculating the number of bits
-// needed to on average encode the data. So, the higher the entropy, the more random the data, the
-// more bits needed to encode that data.
-func shannonEntropy(data string) (entropy float64) {
-	if data == "" {
-		return 0
-	}
-
-	charCounts := make(map[rune]int)
-	for _, char := range data {
-		charCounts[char]++
-	}
-
-	invLength := 1.0 / float64(len(data))
-	for _, count := range charCounts {
-		freq := float64(count) * invLength
-		entropy -= freq * math.Log2(freq)
-	}
-
-	return entropy
-}

+ 325 - 97
detect/detect.go

@@ -1,144 +1,372 @@
 package detect
 
 import (
-	"encoding/json"
+	"context"
 	"fmt"
+	"os"
+	"path/filepath"
 	"regexp"
 	"strings"
-
-	"github.com/rs/zerolog/log"
+	"sync"
 
 	"github.com/zricethezav/gitleaks/v8/config"
+	"github.com/zricethezav/gitleaks/v8/detect/git"
 	"github.com/zricethezav/gitleaks/v8/report"
+
+	"github.com/fatih/semgroup"
+	"github.com/gitleaks/go-gitdiff/gitdiff"
+	"github.com/rs/zerolog/log"
+	"github.com/spf13/viper"
 )
 
-type Options struct {
+// Type used to differentiate between git scan types:
+// $ gitleaks detect
+// $ gitleaks protect
+// $ gitleaks protect staged
+type GitScanType int
+
+const (
+	DetectType GitScanType = iota
+	ProtectType
+	ProtectStagedType
+)
+
+// Detector is the main detector struct
+type Detector struct {
+	// Config is the configuration for the detector
+	Config config.Config
+
+	// Redact is a flag to redact findings. This is exported
+	// so users using gitleaks as a library can set this flag
+	// without calling `detector.Start(cmd *cobra.Command)`
+	Redact bool
+
+	// verbose is a flag to print findings
 	Verbose bool
-	Redact  bool
+
+	// commitMap is used to keep track of commits that have been scanned.
+	// This is only used for logging purposes and git scans.
+	commitMap map[string]bool
+
+	// findingMutex is to prevent concurrent access to the
+	// findings slice when adding findings.
+	findingMutex *sync.Mutex
+
+	// findings is a slice of report.Findings. This is the result
+	// of the detector's scan which can then be used to generate a
+	// report.
+	findings []report.Finding
 }
 
-const MAXGOROUTINES = 4
+// Fragment contains the data to be scanned
+type Fragment struct {
+	// Raw is the raw content of the fragment
+	Raw string
+
+	// FilePath is the path to the file if applicable
+	FilePath string
+
+	// CommitSHA is the SHA of the commit if applicable
+	CommitSHA string
 
-func DetectFindings(cfg config.Config, b []byte, filePath string, commit string) []report.Finding {
+	// newlineIndices is a list of indices of newlines in the raw content.
+	// This is used to calculate the line location of a finding
+	newlineIndices [][]int
+}
+
+// NewDetector creates a new detector with the given config
+func NewDetector(cfg config.Config) *Detector {
+	return &Detector{
+		commitMap:    make(map[string]bool),
+		findingMutex: &sync.Mutex{},
+		findings:     make([]report.Finding, 0),
+		Config:       cfg,
+	}
+}
+
+// NewDetectorDefaultConfig creates a new detector with the default config
+func NewDetectorDefaultConfig() (*Detector, error) {
+	viper.SetConfigType("toml")
+	err := viper.ReadConfig(strings.NewReader(config.DefaultConfig))
+	if err != nil {
+		return nil, err
+	}
+	var vc config.ViperConfig
+	err = viper.Unmarshal(&vc)
+	if err != nil {
+		return nil, err
+	}
+	cfg, err := vc.Translate()
+	if err != nil {
+		return nil, err
+	}
+	return NewDetector(cfg), nil
+}
+
+// DetectBytes scans the given bytes and returns a list of findings
+func (d *Detector) DetectBytes(content []byte) []report.Finding {
+	return d.DetectString(string(content))
+}
+
+// DetectString scans the given string and returns a list of findings
+func (d *Detector) DetectString(content string) []report.Finding {
+	return d.Detect(Fragment{
+		Raw: content,
+	})
+}
+
+// detectRule scans the given fragment for the given rule and returns a list of findings
+func (d *Detector) detectRule(fragment Fragment, rule *config.Rule) []report.Finding {
 	var findings []report.Finding
-	linePairs := regexp.MustCompile("\n").FindAllIndex(b, -1)
 
-	// check if we should skip file based on the global allowlist or if the file is the same as the gitleaks config
-	if cfg.Allowlist.PathAllowed(filePath) || filePath == cfg.Path {
+	// check if filepath or commit is allowed for this rule
+	if rule.Allowlist.CommitAllowed(fragment.CommitSHA) ||
+		rule.Allowlist.PathAllowed(fragment.FilePath) {
 		return findings
 	}
 
-	for _, r := range cfg.Rules {
-		pathSkip := false
-		if r.Allowlist.CommitAllowed(commit) {
-			continue
+	if rule.Path != nil && rule.Regex == nil {
+		// Path _only_ rule
+		if rule.Path.Match([]byte(fragment.FilePath)) {
+			finding := report.Finding{
+				Description: rule.Description,
+				File:        fragment.FilePath,
+				RuleID:      rule.RuleID,
+				Match:       fmt.Sprintf("file detected: %s", fragment.FilePath),
+				Tags:        rule.Tags,
+			}
+			return append(findings, finding)
+		}
+	} else if rule.Path != nil {
+		// if path is set _and_ a regex is set, then we need to check both
+		// so if the path does not match, then we should return early and not
+		// consider the regex
+		if !rule.Path.Match([]byte(fragment.FilePath)) {
+			return findings
 		}
-		if r.Allowlist.PathAllowed(filePath) {
+	}
+
+	matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
+	for _, matchIndex := range matchIndices {
+		// extract secret from match
+		secret := strings.Trim(fragment.Raw[matchIndex[0]:matchIndex[1]], "\n")
+
+		// determine location of match. Note that the location
+		// in the finding will be the line/column numbers of the _match_
+		// not the _secret_, which will be different if the secretGroup
+		// value is set for this rule
+		loc := location(fragment, matchIndex)
+
+		finding := report.Finding{
+			Description: rule.Description,
+			File:        fragment.FilePath,
+			RuleID:      rule.RuleID,
+			StartLine:   loc.startLine,
+			EndLine:     loc.endLine,
+			StartColumn: loc.startColumn,
+			EndColumn:   loc.endColumn,
+			Secret:      secret,
+			Match:       secret,
+			Tags:        rule.Tags,
+		}
+
+		// check if the secret is in the allowlist
+		if rule.Allowlist.RegexAllowed(finding.Secret) ||
+			d.Config.Allowlist.RegexAllowed(finding.Secret) {
 			continue
 		}
 
-		// Check if path should be considered
-		if r.Path != nil {
-			if r.Path.Match([]byte(filePath)) {
-				if r.Regex == nil {
-					// This is a path only rule
-					f := report.Finding{
-						Description: r.Description,
-						File:        filePath,
-						RuleID:      r.RuleID,
-						Match:       fmt.Sprintf("file detected: %s", filePath),
-						Tags:        r.Tags,
-					}
-					findings = append(findings, f)
-					pathSkip = true
+		// extract secret from secret group if set
+		if rule.SecretGroup != 0 {
+			groups := rule.Regex.FindStringSubmatch(secret)
+			if len(groups) <= rule.SecretGroup || len(groups) == 0 {
+				// Config validation should prevent this
+				continue
+			}
+			secret = groups[rule.SecretGroup]
+			finding.Secret = secret
+		}
+
+		// check entropy
+		entropy := shannonEntropy(finding.Secret)
+		finding.Entropy = float32(entropy)
+		if rule.Entropy != 0.0 {
+			if entropy <= rule.Entropy {
+				// entropy is too low, skip this finding
+				continue
+			}
+			// NOTE: this is a goofy hack to get around the fact there golang's regex engine
+			// does not support positive lookaheads. Ideally we would want to add a
+			// restriction on generic rules regex that requires the secret match group
+			// contains both numbers and alphabetical characters, not just alphabetical characters.
+			// What this bit of code does is check if the ruleid is prepended with "generic" and enforces the
+			// secret contains both digits and alphabetical characters.
+			// TODO: this should be replaced with stop words
+			if strings.HasPrefix(rule.RuleID, "generic") {
+				if !containsDigit(secret) {
+					continue
 				}
-			} else {
-				pathSkip = true
 			}
 		}
-		if pathSkip {
-			continue
+
+		findings = append(findings, finding)
+	}
+	return findings
+}
+
+// GitScan accepts a *gitdiff.File channel which contents a git history generated from
+// the output of `git log -p ...`. startGitScan will look at each file (patch) in the history
+// and determine if the patch contains any findings.
+func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
+	var (
+		gitdiffFiles <-chan *gitdiff.File
+		err          error
+	)
+	switch gitScanType {
+	case DetectType:
+		gitdiffFiles, err = git.GitLog(source, logOpts)
+		if err != nil {
+			return d.findings, err
+		}
+	case ProtectType:
+		gitdiffFiles, err = git.GitDiff(source, false)
+		if err != nil {
+			return d.findings, err
 		}
+	case ProtectStagedType:
+		gitdiffFiles, err = git.GitDiff(source, true)
+		if err != nil {
+			return d.findings, err
+		}
+	}
 
-		matchIndices := r.Regex.FindAllIndex(b, -1)
-		for _, m := range matchIndices {
-			location := getLocation(linePairs, m[0], m[1])
-			secret := strings.Trim(string(b[m[0]:m[1]]), "\n")
-			f := report.Finding{
-				Description: r.Description,
-				File:        filePath,
-				RuleID:      r.RuleID,
-				StartLine:   location.startLine,
-				EndLine:     location.endLine,
-				StartColumn: location.startColumn,
-				EndColumn:   location.endColumn,
-				Secret:      secret,
-				Match:       secret,
-				Tags:        r.Tags,
-			}
+	s := semgroup.NewGroup(context.Background(), 4)
 
-			if r.Allowlist.RegexAllowed(f.Secret) || cfg.Allowlist.RegexAllowed(f.Secret) {
+	for gitdiffFile := range gitdiffFiles {
+		gitdiffFile := gitdiffFile
+
+		// skip binary files
+		if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
+			continue
+		}
+
+		// Check if commit is allowed
+		commitSHA := ""
+		if gitdiffFile.PatchHeader != nil {
+			commitSHA = gitdiffFile.PatchHeader.SHA
+			if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
 				continue
 			}
+		}
+		d.addCommit(commitSHA)
+
+		s.Go(func() error {
+			for _, textFragment := range gitdiffFile.TextFragments {
+				if textFragment == nil {
+					return nil
+				}
 
-			// extract secret from secret group if set
-			if r.SecretGroup != 0 {
-				groups := r.Regex.FindStringSubmatch(secret)
-				if len(groups) <= r.SecretGroup || len(groups) == 0 {
-					// Config validation should prevent this
-					break
+				fragment := Fragment{
+					Raw:       textFragment.Raw(gitdiff.OpAdd),
+					CommitSHA: commitSHA,
+					FilePath:  gitdiffFile.NewName,
+				}
+
+				for _, finding := range d.Detect(fragment) {
+					d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
 				}
-				secret = groups[r.SecretGroup]
-				f.Secret = secret
 			}
+			return nil
+		})
+	}
+
+	if err := s.Wait(); err != nil {
+		return d.findings, err
+	}
+	log.Debug().Msgf("%d commits scanned. Note: this number might be smaller than expected due to commits with no additions", len(d.commitMap))
+	return d.findings, nil
+}
 
-			// extract secret from secret group if set
-			if r.EntropySet() {
-				include, entropy := r.IncludeEntropy(secret)
-				if include {
-					f.Entropy = float32(entropy)
-					findings = append(findings, f)
+// DetectFiles accepts a path to a source directory or file and begins a scan of the
+// file or directory.
+func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
+	s := semgroup.NewGroup(context.Background(), 4)
+	paths := make(chan string)
+	s.Go(func() error {
+		defer close(paths)
+		return filepath.Walk(source,
+			func(path string, fInfo os.FileInfo, err error) error {
+				if err != nil {
+					return err
+				}
+				if fInfo.Name() == ".git" {
+					return filepath.SkipDir
+				}
+				if fInfo.Mode().IsRegular() {
+					paths <- path
 				}
-			} else {
-				findings = append(findings, f)
+				return nil
+			})
+	})
+	for pa := range paths {
+		p := pa
+		s.Go(func() error {
+			b, err := os.ReadFile(p)
+			if err != nil {
+				return err
 			}
-		}
+			fragment := Fragment{
+				Raw:      string(b),
+				FilePath: p,
+			}
+			for _, finding := range d.Detect(fragment) {
+				// need to add 1 since line counting starts at 1
+				finding.EndLine++
+				finding.StartLine++
+				d.addFinding(finding)
+			}
+
+			return nil
+		})
+	}
+
+	if err := s.Wait(); err != nil {
+		return d.findings, err
 	}
 
-	return dedupe(findings)
+	return d.findings, nil
 }
 
-func printFinding(f report.Finding) {
-	var b []byte
-	b, _ = json.MarshalIndent(f, "", "	")
-	fmt.Println(string(b))
+// Detect scans the given fragment and returns a list of findings
+func (d *Detector) Detect(fragment Fragment) []report.Finding {
+	var findings []report.Finding
+
+	// check if filepath is allowed
+	if d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
+		fragment.FilePath == d.Config.Path {
+		return findings
+	}
+
+	// add newline indices for location calculation in detectRule
+	fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
+
+	for _, rule := range d.Config.Rules {
+		findings = append(findings, d.detectRule(fragment, rule)...)
+	}
+	return filter(findings, d.Redact)
 }
 
-func dedupe(findings []report.Finding) []report.Finding {
-	var retFindings []report.Finding
-	for _, f := range findings {
-		include := true
-		if strings.Contains(strings.ToLower(f.RuleID), "generic") {
-			for _, fPrime := range findings {
-				if f.StartLine == fPrime.StartLine &&
-					f.EndLine == fPrime.EndLine &&
-					f.Commit == fPrime.Commit &&
-					f.RuleID != fPrime.RuleID &&
-					strings.Contains(fPrime.Secret, f.Secret) &&
-					!strings.Contains(strings.ToLower(fPrime.RuleID), "generic") {
-
-					genericMatch := strings.Replace(f.Match, f.Secret, "REDACTED", -1)
-					betterMatch := strings.Replace(fPrime.Match, fPrime.Secret, "REDACTED", -1)
-					log.Debug().Msgf("skipping %s finding (%s), %s rule takes precendence (%s)", f.RuleID, genericMatch, fPrime.RuleID, betterMatch)
-					include = false
-					break
-				}
-			}
-		}
-		if include {
-			retFindings = append(retFindings, f)
-		}
+// addFinding synchronously adds a finding to the findings slice
+func (d *Detector) addFinding(finding report.Finding) {
+	d.findingMutex.Lock()
+	d.findings = append(d.findings, finding)
+	if d.Verbose {
+		printFinding(finding)
 	}
+	d.findingMutex.Unlock()
+}
 
-	return retFindings
+// addCommit synchronously adds a commit to the commit slice
+func (d *Detector) addCommit(commit string) {
+	d.commitMap[commit] = true
 }

+ 297 - 44
detect/detect_test.go

@@ -2,6 +2,7 @@ package detect
 
 import (
 	"fmt"
+	"os"
 	"path/filepath"
 	"testing"
 
@@ -12,20 +13,22 @@ import (
 	"github.com/zricethezav/gitleaks/v8/report"
 )
 
-func TestDetectFindings(t *testing.T) {
+const configPath = "../testdata/config/"
+const repoBasePath = "../testdata/repos/"
+
+func TestDetect(t *testing.T) {
 	tests := []struct {
 		cfgName          string
-		opts             Options
-		filePath         string
-		bytes            []byte
-		commit           string
+		fragment         Fragment
 		expectedFindings []report.Finding
 		wantError        error
 	}{
 		{
-			cfgName:  "escaped_character_group",
-			bytes:    []byte(`pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`),
-			filePath: "tmp.go",
+			cfgName: "escaped_character_group",
+			fragment: Fragment{
+				Raw:      `pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{
 				{
 					Description: "PyPI upload token",
@@ -38,13 +41,16 @@ func TestDetectFindings(t *testing.T) {
 					EndLine:     1,
 					StartColumn: 1,
 					EndColumn:   86,
+					Entropy:     1.9606875,
 				},
 			},
 		},
 		{
-			cfgName:  "simple",
-			bytes:    []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath: "tmp.go",
+			cfgName: "simple",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{
 				{
 					Description: "AWS Access Key",
@@ -57,32 +63,41 @@ func TestDetectFindings(t *testing.T) {
 					EndLine:     1,
 					StartColumn: 15,
 					EndColumn:   34,
+					Entropy:     3.0841837,
 				},
 			},
 		},
 		{
-			cfgName:          "allow_aws_re",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_aws_re",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 		},
 		{
-			cfgName:          "allow_path",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_path",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 		},
 		{
-			cfgName:          "allow_commit",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_commit",
+			fragment: Fragment{
+				Raw:       `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath:  "tmp.go",
+				CommitSHA: "allowthiscommit",
+			},
 			expectedFindings: []report.Finding{},
-			commit:           "allowthiscommit",
 		},
 		{
-			cfgName:  "entropy_group",
-			bytes:    []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath: "tmp.go",
+			cfgName: "entropy_group",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{
 				{
 					Description: "Discord API key",
@@ -100,15 +115,19 @@ func TestDetectFindings(t *testing.T) {
 			},
 		},
 		{
-			cfgName:          "generic_with_py_path",
-			bytes:            []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath:         "tmp.go",
+			cfgName: "generic_with_py_path",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 		},
 		{
-			cfgName:  "generic_with_py_path",
-			bytes:    []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath: "tmp.py",
+			cfgName: "generic_with_py_path",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.py",
+			},
 			expectedFindings: []report.Finding{
 				{
 					Description: "Generic API Key",
@@ -126,9 +145,11 @@ func TestDetectFindings(t *testing.T) {
 			},
 		},
 		{
-			cfgName:  "path_only",
-			bytes:    []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath: "tmp.py",
+			cfgName: "path_only",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.py",
+			},
 			expectedFindings: []report.Finding{
 				{
 					Description: "Python Files",
@@ -140,22 +161,28 @@ func TestDetectFindings(t *testing.T) {
 			},
 		},
 		{
-			cfgName:          "bad_entropy_group",
-			bytes:            []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath:         "tmp.go",
+			cfgName: "bad_entropy_group",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 			wantError:        fmt.Errorf("Discord API key invalid regex secret group 5, max regex secret group 3"),
 		},
 		{
-			cfgName:          "simple",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         filepath.Join(configPath, "simple.toml"),
+			cfgName: "simple",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: filepath.Join(configPath, "simple.toml"),
+			},
 			expectedFindings: []report.Finding{},
 		},
 		{
-			cfgName:          "allow_global_aws_re",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_global_aws_re",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 		},
 	}
@@ -171,7 +198,10 @@ func TestDetectFindings(t *testing.T) {
 		}
 
 		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
 		cfg, err := vc.Translate()
 		cfg.Path = filepath.Join(configPath, tt.cfgName+".toml")
 		if tt.wantError != nil {
@@ -180,8 +210,231 @@ func TestDetectFindings(t *testing.T) {
 			}
 			assert.Equal(t, tt.wantError, err)
 		}
+		d := NewDetector(cfg)
 
-		findings := DetectFindings(cfg, tt.bytes, tt.filePath, tt.commit)
+		findings := d.Detect(tt.fragment)
 		assert.ElementsMatch(t, tt.expectedFindings, findings)
 	}
 }
+
+// TestFromGit tests the FromGit function
+func TestFromGit(t *testing.T) {
+	tests := []struct {
+		cfgName          string
+		source           string
+		logOpts          string
+		expectedFindings []report.Finding
+	}{
+		{
+			source:  filepath.Join(repoBasePath, "small"),
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   20,
+					EndLine:     20,
+					StartColumn: 19,
+					EndColumn:   38,
+					Secret:      "AKIALALEMEL33243OLIA",
+					Match:       "AKIALALEMEL33243OLIA",
+					File:        "main.go",
+					Date:        "2021-11-02T23:37:53Z",
+					Commit:      "1b6da43b82b22e4eaa10bcf8ee591e91abbfc587",
+					Author:      "Zachary Rice",
+					Email:       "zricer@protonmail.com",
+					Message:     "Accidentally add a secret",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+				{
+					Description: "AWS Access Key",
+					StartLine:   9,
+					EndLine:     9,
+					StartColumn: 17,
+					EndColumn:   36,
+					Secret:      "AKIALALEMEL33243OLIA",
+					Match:       "AKIALALEMEL33243OLIA",
+					File:        "foo/foo.go",
+					Date:        "2021-11-02T23:48:06Z",
+					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
+					Author:      "Zach Rice",
+					Email:       "zricer@protonmail.com",
+					Message:     "adding foo package with secret",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+		{
+			source:  filepath.Join(repoBasePath, "small"),
+			logOpts: "--all foo...",
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   9,
+					EndLine:     9,
+					StartColumn: 17,
+					EndColumn:   36,
+					Secret:      "AKIALALEMEL33243OLIA",
+					Match:       "AKIALALEMEL33243OLIA",
+					Date:        "2021-11-02T23:48:06Z",
+					File:        "foo/foo.go",
+					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
+					Author:      "Zach Rice",
+					Email:       "zricer@protonmail.com",
+					Message:     "adding foo package with secret",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+	}
+
+	err := moveDotGit("dotGit", ".git")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		if err := moveDotGit(".git", "dotGit"); err != nil {
+			t.Error(err)
+		}
+	}()
+
+	for _, tt := range tests {
+
+		viper.AddConfigPath(configPath)
+		viper.SetConfigName("simple")
+		viper.SetConfigType("toml")
+		err = viper.ReadInConfig()
+		if err != nil {
+			t.Error(err)
+		}
+
+		var vc config.ViperConfig
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
+		cfg, err := vc.Translate()
+		if err != nil {
+			t.Error(err)
+		}
+		detector := NewDetector(cfg)
+		findings, err := detector.DetectGit(tt.source, tt.logOpts, DetectType)
+		if err != nil {
+			t.Error(err)
+		}
+
+		for _, f := range findings {
+			f.Match = "" // remove lines cause copying and pasting them has some wack formatting
+		}
+		assert.ElementsMatch(t, tt.expectedFindings, findings)
+	}
+}
+
+// TestFromGit tests the FromGit function
+func TestFromFiles(t *testing.T) {
+	tests := []struct {
+		cfgName          string
+		source           string
+		expectedFindings []report.Finding
+	}{
+		{
+			source:  filepath.Join(repoBasePath, "nogit"),
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   20,
+					EndLine:     20,
+					StartColumn: 16,
+					EndColumn:   35,
+					Match:       "AKIALALEMEL33243OLIA",
+					Secret:      "AKIALALEMEL33243OLIA",
+					File:        "../testdata/repos/nogit/main.go",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+		{
+			source:  filepath.Join(repoBasePath, "nogit", "main.go"),
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   20,
+					EndLine:     20,
+					StartColumn: 16,
+					EndColumn:   35,
+					Match:       "AKIALALEMEL33243OLIA",
+					Secret:      "AKIALALEMEL33243OLIA",
+					File:        "../testdata/repos/nogit/main.go",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		viper.AddConfigPath(configPath)
+		viper.SetConfigName("simple")
+		viper.SetConfigType("toml")
+		err := viper.ReadInConfig()
+		if err != nil {
+			t.Error(err)
+		}
+
+		var vc config.ViperConfig
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
+		cfg, _ := vc.Translate()
+		detector := NewDetector(cfg)
+		findings, err := detector.DetectFiles(tt.source)
+		if err != nil {
+			t.Error(err)
+		}
+
+		assert.ElementsMatch(t, tt.expectedFindings, findings)
+	}
+}
+
+func moveDotGit(from, to string) error {
+	repoDirs, err := os.ReadDir("../testdata/repos")
+	if err != nil {
+		return err
+	}
+	for _, dir := range repoDirs {
+		if to == ".git" {
+			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
+			if os.IsNotExist(err) {
+				// dont want to delete the only copy of .git accidentally
+				continue
+			}
+			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
+		}
+		if !dir.IsDir() {
+			continue
+		}
+		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
+		if os.IsNotExist(err) {
+			continue
+		}
+
+		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
+			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}

+ 0 - 77
detect/files.go

@@ -1,77 +0,0 @@
-package detect
-
-import (
-	"context"
-	"os"
-	"path/filepath"
-	"sync"
-
-	"golang.org/x/sync/errgroup"
-
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-// FromFiles opens the directory or file specified in source and checks each file against the rules
-// from the configuration. If any secrets are found, they are added to the list of findings.
-func FromFiles(source string, cfg config.Config, outputOptions Options) ([]report.Finding, error) {
-	var (
-		findings []report.Finding
-		mu       sync.Mutex
-	)
-	concurrentGoroutines := make(chan struct{}, MAXGOROUTINES)
-	g, _ := errgroup.WithContext(context.Background())
-	paths := make(chan string)
-	g.Go(func() error {
-		defer close(paths)
-		return filepath.Walk(source,
-			func(path string, fInfo os.FileInfo, err error) error {
-				if err != nil {
-					return err
-				}
-				if fInfo.Name() == ".git" {
-					return filepath.SkipDir
-				}
-				if fInfo.Mode().IsRegular() {
-					paths <- path
-				}
-				return nil
-			})
-	})
-	for pa := range paths {
-		p := pa
-		concurrentGoroutines <- struct{}{}
-		g.Go(func() error {
-			defer func() {
-				<-concurrentGoroutines
-			}()
-			b, err := os.ReadFile(p)
-			if err != nil {
-				return err
-			}
-			fis := DetectFindings(cfg, b, p, "")
-			for _, fi := range fis {
-				// need to add 1 since line counting starts at 1
-				fi.StartLine++
-				fi.EndLine++
-
-				if outputOptions.Redact {
-					fi.Redact()
-				}
-				if outputOptions.Verbose {
-					printFinding(fi)
-				}
-				mu.Lock()
-				findings = append(findings, fi)
-				mu.Unlock()
-			}
-			return nil
-		})
-	}
-
-	if err := g.Wait(); err != nil {
-		return findings, err
-	}
-
-	return findings, nil
-}

+ 0 - 80
detect/files_test.go

@@ -1,80 +0,0 @@
-package detect
-
-import (
-	"path/filepath"
-	"testing"
-
-	"github.com/spf13/viper"
-	"github.com/stretchr/testify/assert"
-
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-// TestFromGit tests the FromGit function
-func TestFromFiles(t *testing.T) {
-	tests := []struct {
-		cfgName          string
-		opts             Options
-		source           string
-		expectedFindings []report.Finding
-	}{
-		{
-			source:  filepath.Join(repoBasePath, "nogit"),
-			cfgName: "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   20,
-					EndLine:     20,
-					StartColumn: 16,
-					EndColumn:   35,
-					Match:       "AKIALALEMEL33243OLIA",
-					Secret:      "AKIALALEMEL33243OLIA",
-					File:        "../testdata/repos/nogit/main.go",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-		{
-			source:  filepath.Join(repoBasePath, "nogit", "main.go"),
-			cfgName: "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   20,
-					EndLine:     20,
-					StartColumn: 16,
-					EndColumn:   35,
-					Match:       "AKIALALEMEL33243OLIA",
-					Secret:      "AKIALALEMEL33243OLIA",
-					File:        "../testdata/repos/nogit/main.go",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		viper.AddConfigPath(configPath)
-		viper.SetConfigName("simple")
-		viper.SetConfigType("toml")
-		err := viper.ReadInConfig()
-		if err != nil {
-			t.Error(err)
-		}
-
-		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
-		cfg, _ := vc.Translate()
-
-		findings, err := FromFiles(tt.source, cfg, tt.opts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		assert.ElementsMatch(t, tt.expectedFindings, findings)
-	}
-}

+ 0 - 95
detect/git.go

@@ -1,95 +0,0 @@
-package detect
-
-import (
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/gitleaks/go-gitdiff/gitdiff"
-	"github.com/rs/zerolog/log"
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-// FromGit accepts a gitdiff.File channel (structure output from `git log -p`) and a configuration
-// struct. Files from the gitdiff.File channel are then checked against each rule in the configuration to
-// check for secrets. If any secrets are found, they are added to the list of findings.
-func FromGit(files <-chan *gitdiff.File, cfg config.Config, outputOptions Options) []report.Finding {
-	var findings []report.Finding
-	mu := sync.Mutex{}
-	wg := sync.WaitGroup{}
-	concurrentGoroutines := make(chan struct{}, MAXGOROUTINES)
-	commitMap := make(map[string]bool)
-	for f := range files {
-		// keep track of commits for logging
-		if f.PatchHeader != nil {
-			commitMap[f.PatchHeader.SHA] = true
-		}
-		wg.Add(1)
-		concurrentGoroutines <- struct{}{}
-		go func(f *gitdiff.File) {
-			defer func() {
-				wg.Done()
-				<-concurrentGoroutines
-			}()
-			if f.IsBinary {
-				return
-			}
-
-			if f.IsDelete {
-				return
-			}
-
-			commitSHA := ""
-
-			// Check if commit is allowed
-			if f.PatchHeader != nil {
-				commitSHA = f.PatchHeader.SHA
-				if cfg.Allowlist.CommitAllowed(f.PatchHeader.SHA) {
-					return
-				}
-			}
-
-			for _, tf := range f.TextFragments {
-				if f.TextFragments == nil {
-					// TODO fix this in gitleaks gitdiff fork
-					// https://github.com/gitleaks/gitleaks/issues/11
-					continue
-				}
-
-				for _, fi := range DetectFindings(cfg, []byte(tf.Raw(gitdiff.OpAdd)), f.NewName, commitSHA) {
-					// don't add to start/end lines if finding is from a file only rule
-					if !strings.HasPrefix(fi.Match, "file detected") {
-						fi.StartLine += int(tf.NewPosition)
-						fi.EndLine += int(tf.NewPosition)
-					}
-					if f.PatchHeader != nil {
-						fi.Commit = f.PatchHeader.SHA
-						fi.Message = f.PatchHeader.Message()
-						if f.PatchHeader.Author != nil {
-							fi.Author = f.PatchHeader.Author.Name
-							fi.Email = f.PatchHeader.Author.Email
-						}
-						fi.Date = f.PatchHeader.AuthorDate.UTC().Format(time.RFC3339)
-					}
-
-					if outputOptions.Redact {
-						fi.Redact()
-					}
-
-					if outputOptions.Verbose {
-						printFinding(fi)
-					}
-					mu.Lock()
-					findings = append(findings, fi)
-					mu.Unlock()
-
-				}
-			}
-		}(f)
-	}
-
-	wg.Wait()
-	log.Debug().Msgf("%d commits scanned. Note: this number might be smaller than expected due to commits with no additions", len(commitMap))
-	return findings
-}

+ 0 - 0
git/git.go → detect/git/git.go


+ 158 - 0
detect/git/git_test.go

@@ -0,0 +1,158 @@
+package git_test
+
+// TODO: commenting out this test for now because it's flaky. Alternatives to consider to get this working:
+// -- use `git stash` instead of `restore()`
+
+// const repoBasePath = "../../testdata/repos/"
+
+// const expectPath = "../../testdata/expected/"
+
+// func TestGitLog(t *testing.T) {
+// 	tests := []struct {
+// 		source   string
+// 		logOpts  string
+// 		expected string
+// 	}{
+// 		{
+// 			source:   filepath.Join(repoBasePath, "small"),
+// 			expected: filepath.Join(expectPath, "git", "small.txt"),
+// 		},
+// 		{
+// 			source:   filepath.Join(repoBasePath, "small"),
+// 			expected: filepath.Join(expectPath, "git", "small-branch-foo.txt"),
+// 			logOpts:  "--all foo...",
+// 		},
+// 	}
+
+// 	err := moveDotGit("dotGit", ".git")
+// 	if err != nil {
+// 		t.Fatal(err)
+// 	}
+// 	defer func() {
+// 		if err = moveDotGit(".git", "dotGit"); err != nil {
+// 			t.Fatal(err)
+// 		}
+// 	}()
+
+// 	for _, tt := range tests {
+// 		files, err := git.GitLog(tt.source, tt.logOpts)
+// 		if err != nil {
+// 			t.Error(err)
+// 		}
+
+// 		var diffSb strings.Builder
+// 		for f := range files {
+// 			for _, tf := range f.TextFragments {
+// 				diffSb.WriteString(tf.Raw(gitdiff.OpAdd))
+// 			}
+// 		}
+
+// 		expectedBytes, err := os.ReadFile(tt.expected)
+// 		if err != nil {
+// 			t.Error(err)
+// 		}
+// 		expected := string(expectedBytes)
+// 		if expected != diffSb.String() {
+// 			// write string builder to .got file using os.Create
+// 			err = os.WriteFile(strings.Replace(tt.expected, ".txt", ".got.txt", 1), []byte(diffSb.String()), 0644)
+// 			if err != nil {
+// 				t.Error(err)
+// 			}
+// 			t.Error("expected: ", expected, "got: ", diffSb.String())
+// 		}
+// 	}
+// }
+
+// func TestGitDiff(t *testing.T) {
+// 	tests := []struct {
+// 		source    string
+// 		expected  string
+// 		additions string
+// 		target    string
+// 	}{
+// 		{
+// 			source:    filepath.Join(repoBasePath, "small"),
+// 			expected:  "this line is added\nand another one",
+// 			additions: "this line is added\nand another one",
+// 			target:    filepath.Join(repoBasePath, "small", "main.go"),
+// 		},
+// 	}
+
+// 	err := moveDotGit("dotGit", ".git")
+// 	if err != nil {
+// 		t.Fatal(err)
+// 	}
+// 	defer func() {
+// 		if err = moveDotGit(".git", "dotGit"); err != nil {
+// 			t.Fatal(err)
+// 		}
+// 	}()
+
+// 	for _, tt := range tests {
+// 		noChanges, err := os.ReadFile(tt.target)
+// 		if err != nil {
+// 			t.Error(err)
+// 		}
+// 		err = os.WriteFile(tt.target, []byte(tt.additions), 0644)
+// 		if err != nil {
+// 			restore(tt.target, noChanges, t)
+// 			t.Error(err)
+// 		}
+
+// 		files, err := git.GitDiff(tt.source, false)
+// 		if err != nil {
+// 			restore(tt.target, noChanges, t)
+// 			t.Error(err)
+// 		}
+
+// 		for f := range files {
+// 			sb := strings.Builder{}
+// 			for _, tf := range f.TextFragments {
+// 				sb.WriteString(tf.Raw(gitdiff.OpAdd))
+// 			}
+// 			if sb.String() != tt.expected {
+// 				restore(tt.target, noChanges, t)
+// 				t.Error("expected: ", tt.expected, "got: ", sb.String())
+// 			}
+// 		}
+// 		restore(tt.target, noChanges, t)
+// 	}
+// }
+
+// func restore(path string, data []byte, t *testing.T) {
+// 	err := os.WriteFile(path, data, 0644)
+// 	if err != nil {
+// 		t.Fatal(err)
+// 	}
+// }
+
+// func moveDotGit(from, to string) error {
+// 	repoDirs, err := os.ReadDir("../../testdata/repos")
+// 	if err != nil {
+// 		return err
+// 	}
+// 	for _, dir := range repoDirs {
+// 		if to == ".git" {
+// 			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
+// 			if os.IsNotExist(err) {
+// 				// dont want to delete the only copy of .git accidentally
+// 				continue
+// 			}
+// 			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
+// 		}
+// 		if !dir.IsDir() {
+// 			continue
+// 		}
+// 		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
+// 		if os.IsNotExist(err) {
+// 			continue
+// 		}
+
+// 		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
+// 			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
+// 		if err != nil {
+// 			return err
+// 		}
+// 	}
+// 	return nil
+// }

+ 0 - 160
detect/git_test.go

@@ -1,160 +0,0 @@
-package detect
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-
-	"github.com/spf13/viper"
-	"github.com/stretchr/testify/assert"
-
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/git"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-const repoBasePath = "../testdata/repos/"
-const expectPath = "../testdata/expected/"
-const configPath = "../testdata/config/"
-
-// TestFromGit tests the FromGit function
-func TestFromGit(t *testing.T) {
-	tests := []struct {
-		cfgName          string
-		opts             Options
-		source           string
-		logOpts          string
-		expected         string
-		expectedFindings []report.Finding
-	}{
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small.txt"),
-			cfgName:  "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   20,
-					EndLine:     20,
-					StartColumn: 19,
-					EndColumn:   38,
-					Secret:      "AKIALALEMEL33243OLIA",
-					Match:       "AKIALALEMEL33243OLIA",
-					File:        "main.go",
-					Date:        "2021-11-02T23:37:53Z",
-					Commit:      "1b6da43b82b22e4eaa10bcf8ee591e91abbfc587",
-					Author:      "Zachary Rice",
-					Email:       "zricer@protonmail.com",
-					Message:     "Accidentally add a secret",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-				{
-					Description: "AWS Access Key",
-					StartLine:   9,
-					EndLine:     9,
-					StartColumn: 17,
-					EndColumn:   36,
-					Secret:      "AKIALALEMEL33243OLIA",
-					Match:       "AKIALALEMEL33243OLIA",
-					File:        "foo/foo.go",
-					Date:        "2021-11-02T23:48:06Z",
-					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
-					Author:      "Zach Rice",
-					Email:       "zricer@protonmail.com",
-					Message:     "adding foo package with secret",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small-branch-foo.txt"),
-			logOpts:  "--all foo...",
-			cfgName:  "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   9,
-					EndLine:     9,
-					StartColumn: 17,
-					EndColumn:   36,
-					Secret:      "AKIALALEMEL33243OLIA",
-					Match:       "AKIALALEMEL33243OLIA",
-					Date:        "2021-11-02T23:48:06Z",
-					File:        "foo/foo.go",
-					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
-					Author:      "Zach Rice",
-					Email:       "zricer@protonmail.com",
-					Message:     "adding foo package with secret",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-	}
-
-	err := moveDotGit("dotGit", ".git")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer moveDotGit(".git", "dotGit")
-
-	for _, tt := range tests {
-		files, err := git.GitLog(tt.source, tt.logOpts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		viper.AddConfigPath(configPath)
-		viper.SetConfigName("simple")
-		viper.SetConfigType("toml")
-		err = viper.ReadInConfig()
-		if err != nil {
-			t.Error(err)
-		}
-
-		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
-		cfg, _ := vc.Translate()
-
-		findings := FromGit(files, cfg, tt.opts)
-		for _, f := range findings {
-			f.Match = "" // remove lines cause copying and pasting them has some wack formatting
-		}
-		assert.ElementsMatch(t, tt.expectedFindings, findings)
-	}
-}
-
-func moveDotGit(from, to string) error {
-	repoDirs, err := os.ReadDir("../testdata/repos")
-	if err != nil {
-		return err
-	}
-	for _, dir := range repoDirs {
-		if to == ".git" {
-			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
-			if os.IsNotExist(err) {
-				// dont want to delete the only copy of .git accidentally
-				continue
-			}
-			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
-		}
-		if !dir.IsDir() {
-			continue
-		}
-		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
-		if os.IsNotExist(err) {
-			continue
-		}
-
-		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
-			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}

+ 5 - 2
detect/location.go

@@ -10,7 +10,7 @@ type Location struct {
 	endLineIndex   int
 }
 
-func getLocation(linePairs [][]int, start int, end int) Location {
+func location(fragment Fragment, matchIndex []int) Location {
 	var (
 		prevNewLine int
 		location    Location
@@ -18,7 +18,10 @@ func getLocation(linePairs [][]int, start int, end int) Location {
 		_lineNum    int
 	)
 
-	for lineNum, pair := range linePairs {
+	start := matchIndex[0]
+	end := matchIndex[1]
+
+	for lineNum, pair := range fragment.newlineIndices {
 		_lineNum = lineNum
 		newLineByteIndex := pair[0]
 		if prevNewLine <= start && start < newLineByteIndex {

+ 1 - 1
detect/location_test.go

@@ -49,7 +49,7 @@ func TestGetLocation(t *testing.T) {
 	}
 
 	for _, test := range tests {
-		loc := getLocation(test.linePairs, test.start, test.end)
+		loc := location(Fragment{newlineIndices: test.linePairs}, []int{test.start, test.end})
 		if loc != test.wantLocation {
 			t.Errorf("\nstartLine %d\nstartColumn: %d\nendLine: %d\nendColumn: %d\nstartLineIndex: %d\nendlineIndex %d",
 				loc.startLine, loc.startColumn, loc.endLine, loc.endColumn, loc.startLineIndex, loc.endLineIndex)

+ 107 - 0
detect/utils.go

@@ -0,0 +1,107 @@
+package detect
+
+import (
+	"encoding/json"
+	"fmt"
+	"math"
+	"strings"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v8/report"
+
+	"github.com/gitleaks/go-gitdiff/gitdiff"
+	"github.com/rs/zerolog/log"
+)
+
+// augmentGitFinding updates the start and end line numbers of a finding to include the
+// delta from the git diff
+func augmentGitFinding(finding report.Finding, textFragment *gitdiff.TextFragment, f *gitdiff.File) report.Finding {
+	if !strings.HasPrefix(finding.Match, "file detected") {
+		finding.StartLine += int(textFragment.NewPosition)
+		finding.EndLine += int(textFragment.NewPosition)
+	}
+
+	if f.PatchHeader != nil {
+		finding.Commit = f.PatchHeader.SHA
+		finding.Message = f.PatchHeader.Message()
+		if f.PatchHeader.Author != nil {
+			finding.Author = f.PatchHeader.Author.Name
+			finding.Email = f.PatchHeader.Author.Email
+		}
+		finding.Date = f.PatchHeader.AuthorDate.UTC().Format(time.RFC3339)
+	}
+	return finding
+}
+
+// shannonEntropy calculates the entropy of data using the formula defined here:
+// https://en.wiktionary.org/wiki/Shannon_entropy
+// Another way to think about what this is doing is calculating the number of bits
+// needed to on average encode the data. So, the higher the entropy, the more random the data, the
+// more bits needed to encode that data.
+func shannonEntropy(data string) (entropy float64) {
+	if data == "" {
+		return 0
+	}
+
+	charCounts := make(map[rune]int)
+	for _, char := range data {
+		charCounts[char]++
+	}
+
+	invLength := 1.0 / float64(len(data))
+	for _, count := range charCounts {
+		freq := float64(count) * invLength
+		entropy -= freq * math.Log2(freq)
+	}
+
+	return entropy
+}
+
+// filter will dedupe and redact findings
+func filter(findings []report.Finding, redact bool) []report.Finding {
+	var retFindings []report.Finding
+	for _, f := range findings {
+		include := true
+		if strings.Contains(strings.ToLower(f.RuleID), "generic") {
+			for _, fPrime := range findings {
+				if f.StartLine == fPrime.StartLine &&
+					f.EndLine == fPrime.EndLine &&
+					f.Commit == fPrime.Commit &&
+					f.RuleID != fPrime.RuleID &&
+					strings.Contains(fPrime.Secret, f.Secret) &&
+					!strings.Contains(strings.ToLower(fPrime.RuleID), "generic") {
+
+					genericMatch := strings.Replace(f.Match, f.Secret, "REDACTED", -1)
+					betterMatch := strings.Replace(fPrime.Match, fPrime.Secret, "REDACTED", -1)
+					log.Debug().Msgf("skipping %s finding (%s), %s rule takes precendence (%s)", f.RuleID, genericMatch, fPrime.RuleID, betterMatch)
+					include = false
+					break
+				}
+			}
+		}
+		if redact {
+			f.Redact()
+		}
+		if include {
+			retFindings = append(retFindings, f)
+		}
+	}
+	return retFindings
+}
+
+func printFinding(f report.Finding) {
+	var b []byte
+	b, _ = json.MarshalIndent(f, "", "	")
+	fmt.Println(string(b))
+}
+
+func containsDigit(s string) bool {
+	for _, c := range s {
+		switch c {
+		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return true
+		}
+
+	}
+	return false
+}

+ 0 - 157
git/git_test.go

@@ -1,157 +0,0 @@
-package git_test
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/gitleaks/go-gitdiff/gitdiff"
-	"github.com/zricethezav/gitleaks/v8/git"
-)
-
-const repoBasePath = "../testdata/repos/"
-const expectPath = "../testdata/expected/"
-
-func TestGitLog(t *testing.T) {
-	tests := []struct {
-		source   string
-		logOpts  string
-		expected string
-	}{
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small.txt"),
-		},
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small-branch-foo.txt"),
-			logOpts:  "--all foo...",
-		},
-	}
-
-	err := moveDotGit("dotGit", ".git")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer moveDotGit(".git", "dotGit")
-
-	for _, tt := range tests {
-		files, err := git.GitLog(tt.source, tt.logOpts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		var diffSb strings.Builder
-		for f := range files {
-			for _, tf := range f.TextFragments {
-				diffSb.WriteString(tf.Raw(gitdiff.OpAdd))
-			}
-		}
-
-		expectedBytes, err := os.ReadFile(tt.expected)
-		if err != nil {
-			t.Error(err)
-		}
-		expected := string(expectedBytes)
-		if expected != diffSb.String() {
-			// write string builder to .got file using os.Create
-			err = os.WriteFile(strings.Replace(tt.expected, ".txt", ".got.txt", 1), []byte(diffSb.String()), 0644)
-			if err != nil {
-				t.Error(err)
-			}
-			t.Error("expected: ", expected, "got: ", diffSb.String())
-		}
-	}
-}
-
-func TestGitDiff(t *testing.T) {
-	tests := []struct {
-		source    string
-		expected  string
-		additions string
-		target    string
-	}{
-		{
-			source:    filepath.Join(repoBasePath, "small"),
-			expected:  "this line is added\nand another one",
-			additions: "this line is added\nand another one",
-			target:    filepath.Join(repoBasePath, "small", "main.go"),
-		},
-	}
-
-	err := moveDotGit("dotGit", ".git")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer moveDotGit(".git", "dotGit")
-
-	for _, tt := range tests {
-		noChanges, err := os.ReadFile(tt.target)
-		if err != nil {
-			t.Error(err)
-		}
-		err = os.WriteFile(tt.target, []byte(tt.additions), 0644)
-		if err != nil {
-			restore(tt.target, noChanges, t)
-			t.Error(err)
-		}
-
-		files, err := git.GitDiff(tt.source, false)
-		if err != nil {
-			restore(tt.target, noChanges, t)
-			t.Error(err)
-		}
-
-		for f := range files {
-			sb := strings.Builder{}
-			for _, tf := range f.TextFragments {
-				sb.WriteString(tf.Raw(gitdiff.OpAdd))
-			}
-			if sb.String() != tt.expected {
-				restore(tt.target, noChanges, t)
-				t.Error("expected: ", tt.expected, "got: ", sb.String())
-			}
-		}
-		restore(tt.target, noChanges, t)
-	}
-}
-
-func restore(path string, data []byte, t *testing.T) {
-	err := os.WriteFile(path, data, 0644)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func moveDotGit(from, to string) error {
-	repoDirs, err := os.ReadDir("../testdata/repos")
-	if err != nil {
-		return err
-	}
-	for _, dir := range repoDirs {
-		if to == ".git" {
-			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
-			if os.IsNotExist(err) {
-				// dont want to delete the only copy of .git accidentally
-				continue
-			}
-			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
-		}
-		if !dir.IsDir() {
-			continue
-		}
-		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
-		if os.IsNotExist(err) {
-			continue
-		}
-
-		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
-			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}

+ 4 - 4
go.mod

@@ -3,12 +3,12 @@ module github.com/zricethezav/gitleaks/v8
 go 1.17
 
 require (
+	github.com/fatih/semgroup v1.2.0
 	github.com/gitleaks/go-gitdiff v0.7.4
-	github.com/rs/zerolog v1.25.0
+	github.com/rs/zerolog v1.26.1
 	github.com/spf13/cobra v1.2.1
 	github.com/spf13/viper v1.8.1
 	github.com/stretchr/testify v1.7.0
-	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
 )
 
 require (
@@ -25,9 +25,9 @@ require (
 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/subosito/gotenv v1.2.0 // indirect
+	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
 	golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 // indirect
-	golang.org/x/text v0.3.5 // indirect
-	gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
+	golang.org/x/text v0.3.6 // indirect
 	gopkg.in/ini.v1 v1.62.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect

+ 13 - 6
go.sum

@@ -67,6 +67,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m
 github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
+github.com/fatih/semgroup v1.2.0 h1:h/OLXwEM+3NNyAdZEpMiH1OzfplU09i2qXPVThGZvyg=
+github.com/fatih/semgroup v1.2.0/go.mod h1:1KAD4iIYfXjE4U13B48VM4z9QUwV5Tt8O4rS879kgm8=
 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
@@ -209,8 +211,8 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
-github.com/rs/zerolog v1.25.0 h1:Rj7XygbUHKUlDPcVdoLyR91fJBsduXj5fRxyqIQj/II=
-github.com/rs/zerolog v1.25.0/go.mod h1:7KHcEGe0QZPOm2IE4Kpb5rTh6n1h2hIgS5OOnu1rUaI=
+github.com/rs/zerolog v1.26.1 h1:/ihwxqH+4z8UxyI70wM1z9yCvkWcfz/a3mj48k/Zngc=
+github.com/rs/zerolog v1.26.1/go.mod h1:/wSSJWX7lVrsOwlbyTRSOJvqRlc+WjWlfes+CiJ+tmc=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
@@ -246,6 +248,7 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
 github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
 go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
 go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ=
@@ -266,6 +269,7 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -337,6 +341,7 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -401,7 +406,9 @@ golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 h1:WecRHqgE09JBkh/584XIE6PMz5KKE/vER4izNUi30AQ=
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -411,8 +418,9 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.5 h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -467,7 +475,7 @@ golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
 golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -575,9 +583,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
-gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
 gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU=
 gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=

+ 0 - 8
report/finding.go

@@ -1,7 +1,6 @@
 package report
 
 import (
-	"strconv"
 	"strings"
 )
 
@@ -43,10 +42,3 @@ func (f *Finding) Redact() {
 	f.Match = strings.Replace(f.Match, f.Secret, "REDACTED", -1)
 	f.Secret = "REDACT"
 }
-
-func (f *Finding) Hash() string {
-	return f.Secret + f.Commit +
-		strconv.Itoa(f.EndLine) +
-		strconv.Itoa(f.StartLine)
-
-}

+ 4 - 5
report/report.go

@@ -21,13 +21,12 @@ func Write(findings []Finding, cfg config.Config, ext string, reportPath string)
 	ext = strings.ToLower(ext)
 	switch ext {
 	case ".json", "json":
-		writeJson(findings, file)
+		err = writeJson(findings, file)
 	case ".csv", "csv":
-		writeCsv(findings, file)
+		err = writeCsv(findings, file)
 	case ".sarif", "sarif":
-		writeSarif(cfg, findings, file)
-
+		err = writeSarif(cfg, findings, file)
 	}
 
-	return nil
+	return err
 }

+ 5 - 1
report/sarif_test.go

@@ -63,7 +63,11 @@ func TestWriteSarif(t *testing.T) {
 		}
 
 		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
+
 		cfg, err := vc.Translate()
 		if err != nil {
 			t.Error(err)