Просмотр исходного кода

Refactor `detect`, add `entropy` to all findings (#804)

Refactor `detect`, add `entropy` to all findings
Zachary Rice 3 лет назад
Родитель
Сommit
6e72472b60
24 измененных файлов с 1086 добавлено и 913 удалено
  1. 62 27
      cmd/detect.go
  2. 47 14
      cmd/protect.go
  3. 11 4
      cmd/root.go
  4. 17 11
      config/allowlist.go
  5. 4 51
      config/config_test.go
  6. 26 33
      config/rule.go
  7. 0 36
      config/utils.go
  8. 325 97
      detect/detect.go
  9. 297 44
      detect/detect_test.go
  10. 0 77
      detect/files.go
  11. 0 80
      detect/files_test.go
  12. 0 95
      detect/git.go
  13. 0 0
      detect/git/git.go
  14. 158 0
      detect/git/git_test.go
  15. 0 160
      detect/git_test.go
  16. 5 2
      detect/location.go
  17. 1 1
      detect/location_test.go
  18. 107 0
      detect/utils.go
  19. 0 157
      git/git_test.go
  20. 4 4
      go.mod
  21. 13 6
      go.sum
  22. 0 8
      report/finding.go
  23. 4 5
      report/report.go
  24. 5 1
      report/sarif_test.go

+ 62 - 27
cmd/detect.go

@@ -11,7 +11,6 @@ import (
 
 
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/detect"
 	"github.com/zricethezav/gitleaks/v8/detect"
-	"github.com/zricethezav/gitleaks/v8/git"
 	"github.com/zricethezav/gitleaks/v8/report"
 	"github.com/zricethezav/gitleaks/v8/report"
 )
 )
 
 
@@ -35,56 +34,92 @@ func runDetect(cmd *cobra.Command, args []string) {
 		err      error
 		err      error
 	)
 	)
 
 
-	viper.Unmarshal(&vc)
+	// Load config
+	if err = viper.Unmarshal(&vc); err != nil {
+		log.Fatal().Err(err).Msg("Failed to load config")
+	}
 	cfg, err := vc.Translate()
 	cfg, err := vc.Translate()
 	if err != nil {
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to load config")
 		log.Fatal().Err(err).Msg("Failed to load config")
 	}
 	}
-
 	cfg.Path, _ = cmd.Flags().GetString("config")
 	cfg.Path, _ = cmd.Flags().GetString("config")
-	source, _ := cmd.Flags().GetString("source")
-	logOpts, _ := cmd.Flags().GetString("log-opts")
-	verbose, _ := cmd.Flags().GetBool("verbose")
-	redact, _ := cmd.Flags().GetBool("redact")
-	noGit, _ := cmd.Flags().GetBool("no-git")
-	exitCode, _ := cmd.Flags().GetInt("exit-code")
-	if cfg.Path == "" {
-		cfg.Path = filepath.Join(source, ".gitleaks.toml")
-	}
+
+	// start timer
 	start := time.Now()
 	start := time.Now()
 
 
+	// Setup detector
+	detector := detect.NewDetector(cfg)
+	detector.Config.Path, err = cmd.Flags().GetString("config")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+	source, err := cmd.Flags().GetString("source")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+	// if config path is not set, then use the {source}/.gitleaks.toml path.
+	// note that there may not be a `{source}/.gitleaks.toml` file, this is ok.
+	if detector.Config.Path == "" {
+		detector.Config.Path = filepath.Join(source, ".gitleaks.toml")
+	}
+	// set verbose flag
+	if detector.Verbose, err = cmd.Flags().GetBool("verbose"); err != nil {
+		log.Fatal().Err(err)
+	}
+	// set redact flag
+	if detector.Redact, err = cmd.Flags().GetBool("redact"); err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// set exit code
+	exitCode, err := cmd.Flags().GetInt("exit-code")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// determine what type of scan:
+	// - git: scan the history of the repo
+	// - no-git: scan files by treating the repo as a plain directory
+	noGit, err := cmd.Flags().GetBool("no-git")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// start the detector scan
 	if noGit {
 	if noGit {
-		if logOpts != "" {
-			log.Fatal().Err(err).Msg("--log-opts cannot be used with --no-git")
-		}
-		findings, err = detect.FromFiles(source, cfg, detect.Options{
-			Verbose: verbose,
-			Redact:  redact,
-		})
+		findings, err = detector.DetectFiles(source)
 		if err != nil {
 		if err != nil {
-			log.Fatal().Err(err).Msg("Failed to scan files")
+			// don't exit on error, just log it
+			log.Error().Err(err)
 		}
 		}
+
 	} else {
 	} else {
-		files, err := git.GitLog(source, logOpts)
+		logOpts, err := cmd.Flags().GetString("log-opts")
 		if err != nil {
 		if err != nil {
-			log.Fatal().Err(err).Msg("Failed to get git log")
+			log.Fatal().Err(err)
+		}
+		findings, err = detector.DetectGit(source, logOpts, detect.DetectType)
+		if err != nil {
+			// don't exit on error, just log it
+			log.Error().Err(err)
 		}
 		}
-
-		findings = detect.FromGit(files, cfg, detect.Options{Verbose: verbose, Redact: redact})
 	}
 	}
 
 
+	// log info about the scan
+	log.Info().Msgf("scan completed in %s", time.Since(start))
 	if len(findings) != 0 {
 	if len(findings) != 0 {
 		log.Warn().Msgf("leaks found: %d", len(findings))
 		log.Warn().Msgf("leaks found: %d", len(findings))
 	} else {
 	} else {
 		log.Info().Msg("no leaks found")
 		log.Info().Msg("no leaks found")
 	}
 	}
 
 
-	log.Info().Msgf("scan completed in %s", time.Since(start))
-
+	// write report if desired
 	reportPath, _ := cmd.Flags().GetString("report-path")
 	reportPath, _ := cmd.Flags().GetString("report-path")
 	ext, _ := cmd.Flags().GetString("report-format")
 	ext, _ := cmd.Flags().GetString("report-format")
 	if reportPath != "" {
 	if reportPath != "" {
-		report.Write(findings, cfg, ext, reportPath)
+		if err = report.Write(findings, cfg, ext, reportPath); err != nil {
+			log.Fatal().Err(err)
+		}
 	}
 	}
 
 
 	if len(findings) != 0 {
 	if len(findings) != 0 {

+ 47 - 14
cmd/protect.go

@@ -11,7 +11,6 @@ import (
 
 
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/detect"
 	"github.com/zricethezav/gitleaks/v8/detect"
-	"github.com/zricethezav/gitleaks/v8/git"
 	"github.com/zricethezav/gitleaks/v8/report"
 	"github.com/zricethezav/gitleaks/v8/report"
 )
 )
 
 
@@ -30,41 +29,75 @@ func runProtect(cmd *cobra.Command, args []string) {
 	initConfig()
 	initConfig()
 	var vc config.ViperConfig
 	var vc config.ViperConfig
 
 
-	viper.Unmarshal(&vc)
+	if err := viper.Unmarshal(&vc); err != nil {
+		log.Fatal().Err(err).Msg("Failed to load config")
+	}
 	cfg, err := vc.Translate()
 	cfg, err := vc.Translate()
 	if err != nil {
 	if err != nil {
 		log.Fatal().Err(err).Msg("Failed to load config")
 		log.Fatal().Err(err).Msg("Failed to load config")
 	}
 	}
 
 
 	cfg.Path, _ = cmd.Flags().GetString("config")
 	cfg.Path, _ = cmd.Flags().GetString("config")
-	source, _ := cmd.Flags().GetString("source")
-	verbose, _ := cmd.Flags().GetBool("verbose")
-	redact, _ := cmd.Flags().GetBool("redact")
 	exitCode, _ := cmd.Flags().GetInt("exit-code")
 	exitCode, _ := cmd.Flags().GetInt("exit-code")
 	staged, _ := cmd.Flags().GetBool("staged")
 	staged, _ := cmd.Flags().GetBool("staged")
-	if cfg.Path == "" {
-		cfg.Path = filepath.Join(source, ".gitleaks.toml")
-	}
 	start := time.Now()
 	start := time.Now()
 
 
-	files, err := git.GitDiff(source, staged)
+	// Setup detector
+	detector := detect.NewDetector(cfg)
+	detector.Config.Path, err = cmd.Flags().GetString("config")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+	source, err := cmd.Flags().GetString("source")
 	if err != nil {
 	if err != nil {
-		log.Fatal().Err(err).Msg("Failed to get git log")
+		log.Fatal().Err(err)
+	}
+	// if config path is not set, then use the {source}/.gitleaks.toml path.
+	// note that there may not be a `{source}/.gitleaks.toml` file, this is ok.
+	if detector.Config.Path == "" {
+		detector.Config.Path = filepath.Join(source, ".gitleaks.toml")
+	}
+	// set verbose flag
+	if detector.Verbose, err = cmd.Flags().GetBool("verbose"); err != nil {
+		log.Fatal().Err(err)
+	}
+	// set redact flag
+	if detector.Redact, err = cmd.Flags().GetBool("redact"); err != nil {
+		log.Fatal().Err(err)
 	}
 	}
 
 
-	findings := detect.FromGit(files, cfg, detect.Options{Verbose: verbose, Redact: redact})
+	// get log options for git scan
+	logOpts, err := cmd.Flags().GetString("log-opts")
+	if err != nil {
+		log.Fatal().Err(err)
+	}
+
+	// start git scan
+	var findings []report.Finding
+	if staged {
+		findings, err = detector.DetectGit(source, logOpts, detect.ProtectStagedType)
+	} else {
+		findings, err = detector.DetectGit(source, logOpts, detect.ProtectType)
+	}
+	if err != nil {
+		// don't exit on error, just log it
+		log.Error().Err(err)
+	}
+
+	// log info about the scan
+	log.Info().Msgf("scan completed in %s", time.Since(start))
 	if len(findings) != 0 {
 	if len(findings) != 0 {
 		log.Warn().Msgf("leaks found: %d", len(findings))
 		log.Warn().Msgf("leaks found: %d", len(findings))
 	} else {
 	} else {
 		log.Info().Msg("no leaks found")
 		log.Info().Msg("no leaks found")
 	}
 	}
 
 
-	log.Info().Msgf("scan duration: %s", time.Since(start))
-
 	reportPath, _ := cmd.Flags().GetString("report-path")
 	reportPath, _ := cmd.Flags().GetString("report-path")
 	ext, _ := cmd.Flags().GetString("report-format")
 	ext, _ := cmd.Flags().GetString("report-format")
 	if reportPath != "" {
 	if reportPath != "" {
-		report.Write(findings, cfg, ext, reportPath)
+		if err = report.Write(findings, cfg, ext, reportPath); err != nil {
+			log.Fatal().Err(err)
+		}
 	}
 	}
 	if len(findings) != 0 {
 	if len(findings) != 0 {
 		os.Exit(exitCode)
 		os.Exit(exitCode)

+ 11 - 4
cmd/root.go

@@ -45,7 +45,10 @@ func init() {
 	rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (debug, info, warn, error, fatal)")
 	rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (debug, info, warn, error, fatal)")
 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
 	rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")
 	rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")
-	viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config"))
+	err := viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config"))
+	if err != nil {
+		log.Fatal().Msgf("err binding config %s", err.Error())
+	}
 }
 }
 
 
 func initLog() {
 func initLog() {
@@ -71,7 +74,7 @@ func initLog() {
 }
 }
 
 
 func initConfig() {
 func initConfig() {
-	fmt.Fprintf(os.Stderr, banner)
+	fmt.Fprint(os.Stderr, banner)
 	cfgPath, err := rootCmd.Flags().GetString("config")
 	cfgPath, err := rootCmd.Flags().GetString("config")
 	if err != nil {
 	if err != nil {
 		log.Fatal().Msg(err.Error())
 		log.Fatal().Msg(err.Error())
@@ -97,14 +100,18 @@ func initConfig() {
 			log.Debug().Msgf("Unable to load gitleaks config from %s since --source=%s is a file, using default config",
 			log.Debug().Msgf("Unable to load gitleaks config from %s since --source=%s is a file, using default config",
 				filepath.Join(source, ".gitleaks.toml"), source)
 				filepath.Join(source, ".gitleaks.toml"), source)
 			viper.SetConfigType("toml")
 			viper.SetConfigType("toml")
-			viper.ReadConfig(strings.NewReader(config.DefaultConfig))
+			if err = viper.ReadConfig(strings.NewReader(config.DefaultConfig)); err != nil {
+				log.Fatal().Msgf("err reading toml %s", err.Error())
+			}
 			return
 			return
 		}
 		}
 
 
 		if _, err := os.Stat(filepath.Join(source, ".gitleaks.toml")); os.IsNotExist(err) {
 		if _, err := os.Stat(filepath.Join(source, ".gitleaks.toml")); os.IsNotExist(err) {
 			log.Debug().Msgf("No gitleaks config found in path %s, using default gitleaks config", filepath.Join(source, ".gitleaks.toml"))
 			log.Debug().Msgf("No gitleaks config found in path %s, using default gitleaks config", filepath.Join(source, ".gitleaks.toml"))
 			viper.SetConfigType("toml")
 			viper.SetConfigType("toml")
-			viper.ReadConfig(strings.NewReader(config.DefaultConfig))
+			if err = viper.ReadConfig(strings.NewReader(config.DefaultConfig)); err != nil {
+				log.Fatal().Msgf("err reading default config toml %s", err.Error())
+			}
 			return
 			return
 		} else {
 		} else {
 			log.Debug().Msgf("Using existing gitleaks config %s from `(--source)/.gitleaks.toml`", filepath.Join(source, ".gitleaks.toml"))
 			log.Debug().Msgf("Using existing gitleaks config %s from `(--source)/.gitleaks.toml`", filepath.Join(source, ".gitleaks.toml"))

+ 17 - 11
config/allowlist.go

@@ -2,13 +2,23 @@ package config
 
 
 import "regexp"
 import "regexp"
 
 
+// Allowlist allows a rule to be ignored for specific
+// regexes, paths, and/or commits
 type Allowlist struct {
 type Allowlist struct {
+	// Short human readable description of the allowlist.
 	Description string
 	Description string
-	Regexes     []*regexp.Regexp
-	Paths       []*regexp.Regexp
-	Commits     []string
+
+	// Regexes is slice of content regular expressions that are allowed to be ignored.
+	Regexes []*regexp.Regexp
+
+	// Paths is a slice of path regular expressions that are allowed to be ignored.
+	Paths []*regexp.Regexp
+
+	// Commits is a slice of commit SHAs that are allowed to be ignored.
+	Commits []string
 }
 }
 
 
+// CommitAllowed returns true if the commit is allowed to be ignored.
 func (a *Allowlist) CommitAllowed(c string) bool {
 func (a *Allowlist) CommitAllowed(c string) bool {
 	if c == "" {
 	if c == "" {
 		return false
 		return false
@@ -21,16 +31,12 @@ func (a *Allowlist) CommitAllowed(c string) bool {
 	return false
 	return false
 }
 }
 
 
+// PathAllowed returns true if the path is allowed to be ignored.
 func (a *Allowlist) PathAllowed(path string) bool {
 func (a *Allowlist) PathAllowed(path string) bool {
-	if anyRegexMatch(path, a.Paths) {
-		return true
-	}
-	return false
+	return anyRegexMatch(path, a.Paths)
 }
 }
 
 
+// RegexAllowed returns true if the regex is allowed to be ignored.
 func (a *Allowlist) RegexAllowed(s string) bool {
 func (a *Allowlist) RegexAllowed(s string) bool {
-	if anyRegexMatch(s, a.Regexes) {
-		return true
-	}
-	return false
+	return anyRegexMatch(s, a.Regexes)
 }
 }

+ 4 - 51
config/config_test.go

@@ -103,7 +103,10 @@ func TestTranslate(t *testing.T) {
 		}
 		}
 
 
 		var vc ViperConfig
 		var vc ViperConfig
-		viper.Unmarshal(&vc)
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
 		cfg, err := vc.Translate()
 		cfg, err := vc.Translate()
 		if tt.wantError != nil {
 		if tt.wantError != nil {
 			if err == nil {
 			if err == nil {
@@ -115,53 +118,3 @@ func TestTranslate(t *testing.T) {
 		assert.Equal(t, cfg.Rules, tt.cfg.Rules)
 		assert.Equal(t, cfg.Rules, tt.cfg.Rules)
 	}
 	}
 }
 }
-
-func TestIncludeEntropy(t *testing.T) {
-	tests := []struct {
-		rule    Rule
-		secret  string
-		entropy float32
-		include bool
-	}{
-		{
-			rule: Rule{
-				RuleID:      "generic-api-key",
-				SecretGroup: 4,
-				Entropy:     3.5,
-				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
-			},
-			secret:  `e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5`,
-			entropy: 3.7906235872459746,
-			include: true,
-		},
-		{
-			rule: Rule{
-				RuleID:      "generic-api-key",
-				SecretGroup: 4,
-				Entropy:     4,
-				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
-			},
-			secret:  `e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5`,
-			entropy: 3.7906235872459746,
-			include: false,
-		},
-		{
-			rule: Rule{
-				RuleID:      "generic-api-key",
-				SecretGroup: 4,
-				Entropy:     3.0,
-				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
-			},
-			secret:  `ssh-keyboard-interactive`,
-			entropy: 0,
-			include: false,
-		},
-	}
-
-	for _, tt := range tests {
-		include, entropy := tt.rule.IncludeEntropy(tt.secret)
-		assert.Equal(t, true, tt.rule.EntropySet())
-		assert.Equal(t, tt.entropy, float32(entropy))
-		assert.Equal(t, tt.include, include)
-	}
-}

+ 26 - 33
config/rule.go

@@ -2,44 +2,37 @@ package config
 
 
 import (
 import (
 	"regexp"
 	"regexp"
-	"strings"
 )
 )
 
 
+// Rules contain information that define details on how to detect secrets
 type Rule struct {
 type Rule struct {
+	// Description is the description of the rule.
 	Description string
 	Description string
-	RuleID      string
-	Entropy     float64
+
+	// RuleID is a unique identifier for this rule
+	RuleID string
+
+	// Entropy is a float representing the minimum shannon
+	// entropy a regex group must have to be considered a secret.
+	Entropy float64
+
+	// SecretGroup is an int used to extract secret from regex
+	// match and used as the group that will have its entropy
+	// checked if `entropy` is set.
 	SecretGroup int
 	SecretGroup int
-	Regex       *regexp.Regexp
-	Path        *regexp.Regexp
-	Tags        []string
-	Allowlist   Allowlist
-}
 
 
-func (r *Rule) IncludeEntropy(secret string) (bool, float64) {
-	// NOTE: this is a goofy hack to get around the fact there golang's regex engine
-	// does not support positive lookaheads. Ideally we would want to add a
-	// restriction on generic rules regex that requires the secret match group
-	// contains both numbers and alphabetical characters. What this bit of code does is
-	// check if the ruleid is prepended with "generic" and enforces the
-	// secret contains both digits and alphabetical characters.
-	if strings.HasPrefix(r.RuleID, "generic") {
-		if !containsDigit(secret) {
-			return false, 0.0
-		}
-	}
-	// group = 0 will check the entropy of the whole regex match
-	e := shannonEntropy(secret)
-	if e > r.Entropy {
-		return true, e
-	}
-
-	return false, e
-}
+	// Regex is a golang regular expression used to detect secrets.
+	Regex *regexp.Regexp
+
+	// Path is a golang regular expression used to
+	// filter secrets by path
+	Path *regexp.Regexp
+
+	// Tags is an array of strings used for metadata
+	// and reporting purposes.
+	Tags []string
 
 
-func (r *Rule) EntropySet() bool {
-	if r.Entropy == 0.0 {
-		return false
-	}
-	return true
+	// Allowlist allows a rule to be ignored for specific
+	// regexes, paths, and/or commits
+	Allowlist Allowlist
 }
 }

+ 0 - 36
config/utils.go

@@ -1,7 +1,6 @@
 package config
 package config
 
 
 import (
 import (
-	"math"
 	"regexp"
 	"regexp"
 )
 )
 
 
@@ -23,38 +22,3 @@ func regexMatched(f string, re *regexp.Regexp) bool {
 	}
 	}
 	return false
 	return false
 }
 }
-
-func containsDigit(s string) bool {
-	for _, c := range s {
-		switch c {
-		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
-			return true
-		}
-
-	}
-	return false
-}
-
-// shannonEntropy calculates the entropy of data using the formula defined here:
-// https://en.wiktionary.org/wiki/Shannon_entropy
-// Another way to think about what this is doing is calculating the number of bits
-// needed to on average encode the data. So, the higher the entropy, the more random the data, the
-// more bits needed to encode that data.
-func shannonEntropy(data string) (entropy float64) {
-	if data == "" {
-		return 0
-	}
-
-	charCounts := make(map[rune]int)
-	for _, char := range data {
-		charCounts[char]++
-	}
-
-	invLength := 1.0 / float64(len(data))
-	for _, count := range charCounts {
-		freq := float64(count) * invLength
-		entropy -= freq * math.Log2(freq)
-	}
-
-	return entropy
-}

+ 325 - 97
detect/detect.go

@@ -1,144 +1,372 @@
 package detect
 package detect
 
 
 import (
 import (
-	"encoding/json"
+	"context"
 	"fmt"
 	"fmt"
+	"os"
+	"path/filepath"
 	"regexp"
 	"regexp"
 	"strings"
 	"strings"
-
-	"github.com/rs/zerolog/log"
+	"sync"
 
 
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/config"
+	"github.com/zricethezav/gitleaks/v8/detect/git"
 	"github.com/zricethezav/gitleaks/v8/report"
 	"github.com/zricethezav/gitleaks/v8/report"
+
+	"github.com/fatih/semgroup"
+	"github.com/gitleaks/go-gitdiff/gitdiff"
+	"github.com/rs/zerolog/log"
+	"github.com/spf13/viper"
 )
 )
 
 
-type Options struct {
+// Type used to differentiate between git scan types:
+// $ gitleaks detect
+// $ gitleaks protect
+// $ gitleaks protect staged
+type GitScanType int
+
+const (
+	DetectType GitScanType = iota
+	ProtectType
+	ProtectStagedType
+)
+
+// Detector is the main detector struct
+type Detector struct {
+	// Config is the configuration for the detector
+	Config config.Config
+
+	// Redact is a flag to redact findings. This is exported
+	// so users using gitleaks as a library can set this flag
+	// without calling `detector.Start(cmd *cobra.Command)`
+	Redact bool
+
+	// verbose is a flag to print findings
 	Verbose bool
 	Verbose bool
-	Redact  bool
+
+	// commitMap is used to keep track of commits that have been scanned.
+	// This is only used for logging purposes and git scans.
+	commitMap map[string]bool
+
+	// findingMutex is to prevent concurrent access to the
+	// findings slice when adding findings.
+	findingMutex *sync.Mutex
+
+	// findings is a slice of report.Findings. This is the result
+	// of the detector's scan which can then be used to generate a
+	// report.
+	findings []report.Finding
 }
 }
 
 
-const MAXGOROUTINES = 4
+// Fragment contains the data to be scanned
+type Fragment struct {
+	// Raw is the raw content of the fragment
+	Raw string
+
+	// FilePath is the path to the file if applicable
+	FilePath string
+
+	// CommitSHA is the SHA of the commit if applicable
+	CommitSHA string
 
 
-func DetectFindings(cfg config.Config, b []byte, filePath string, commit string) []report.Finding {
+	// newlineIndices is a list of indices of newlines in the raw content.
+	// This is used to calculate the line location of a finding
+	newlineIndices [][]int
+}
+
+// NewDetector creates a new detector with the given config
+func NewDetector(cfg config.Config) *Detector {
+	return &Detector{
+		commitMap:    make(map[string]bool),
+		findingMutex: &sync.Mutex{},
+		findings:     make([]report.Finding, 0),
+		Config:       cfg,
+	}
+}
+
+// NewDetectorDefaultConfig creates a new detector with the default config
+func NewDetectorDefaultConfig() (*Detector, error) {
+	viper.SetConfigType("toml")
+	err := viper.ReadConfig(strings.NewReader(config.DefaultConfig))
+	if err != nil {
+		return nil, err
+	}
+	var vc config.ViperConfig
+	err = viper.Unmarshal(&vc)
+	if err != nil {
+		return nil, err
+	}
+	cfg, err := vc.Translate()
+	if err != nil {
+		return nil, err
+	}
+	return NewDetector(cfg), nil
+}
+
+// DetectBytes scans the given bytes and returns a list of findings
+func (d *Detector) DetectBytes(content []byte) []report.Finding {
+	return d.DetectString(string(content))
+}
+
+// DetectString scans the given string and returns a list of findings
+func (d *Detector) DetectString(content string) []report.Finding {
+	return d.Detect(Fragment{
+		Raw: content,
+	})
+}
+
+// detectRule scans the given fragment for the given rule and returns a list of findings
+func (d *Detector) detectRule(fragment Fragment, rule *config.Rule) []report.Finding {
 	var findings []report.Finding
 	var findings []report.Finding
-	linePairs := regexp.MustCompile("\n").FindAllIndex(b, -1)
 
 
-	// check if we should skip file based on the global allowlist or if the file is the same as the gitleaks config
-	if cfg.Allowlist.PathAllowed(filePath) || filePath == cfg.Path {
+	// check if filepath or commit is allowed for this rule
+	if rule.Allowlist.CommitAllowed(fragment.CommitSHA) ||
+		rule.Allowlist.PathAllowed(fragment.FilePath) {
 		return findings
 		return findings
 	}
 	}
 
 
-	for _, r := range cfg.Rules {
-		pathSkip := false
-		if r.Allowlist.CommitAllowed(commit) {
-			continue
+	if rule.Path != nil && rule.Regex == nil {
+		// Path _only_ rule
+		if rule.Path.Match([]byte(fragment.FilePath)) {
+			finding := report.Finding{
+				Description: rule.Description,
+				File:        fragment.FilePath,
+				RuleID:      rule.RuleID,
+				Match:       fmt.Sprintf("file detected: %s", fragment.FilePath),
+				Tags:        rule.Tags,
+			}
+			return append(findings, finding)
+		}
+	} else if rule.Path != nil {
+		// if path is set _and_ a regex is set, then we need to check both
+		// so if the path does not match, then we should return early and not
+		// consider the regex
+		if !rule.Path.Match([]byte(fragment.FilePath)) {
+			return findings
 		}
 		}
-		if r.Allowlist.PathAllowed(filePath) {
+	}
+
+	matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
+	for _, matchIndex := range matchIndices {
+		// extract secret from match
+		secret := strings.Trim(fragment.Raw[matchIndex[0]:matchIndex[1]], "\n")
+
+		// determine location of match. Note that the location
+		// in the finding will be the line/column numbers of the _match_
+		// not the _secret_, which will be different if the secretGroup
+		// value is set for this rule
+		loc := location(fragment, matchIndex)
+
+		finding := report.Finding{
+			Description: rule.Description,
+			File:        fragment.FilePath,
+			RuleID:      rule.RuleID,
+			StartLine:   loc.startLine,
+			EndLine:     loc.endLine,
+			StartColumn: loc.startColumn,
+			EndColumn:   loc.endColumn,
+			Secret:      secret,
+			Match:       secret,
+			Tags:        rule.Tags,
+		}
+
+		// check if the secret is in the allowlist
+		if rule.Allowlist.RegexAllowed(finding.Secret) ||
+			d.Config.Allowlist.RegexAllowed(finding.Secret) {
 			continue
 			continue
 		}
 		}
 
 
-		// Check if path should be considered
-		if r.Path != nil {
-			if r.Path.Match([]byte(filePath)) {
-				if r.Regex == nil {
-					// This is a path only rule
-					f := report.Finding{
-						Description: r.Description,
-						File:        filePath,
-						RuleID:      r.RuleID,
-						Match:       fmt.Sprintf("file detected: %s", filePath),
-						Tags:        r.Tags,
-					}
-					findings = append(findings, f)
-					pathSkip = true
+		// extract secret from secret group if set
+		if rule.SecretGroup != 0 {
+			groups := rule.Regex.FindStringSubmatch(secret)
+			if len(groups) <= rule.SecretGroup || len(groups) == 0 {
+				// Config validation should prevent this
+				continue
+			}
+			secret = groups[rule.SecretGroup]
+			finding.Secret = secret
+		}
+
+		// check entropy
+		entropy := shannonEntropy(finding.Secret)
+		finding.Entropy = float32(entropy)
+		if rule.Entropy != 0.0 {
+			if entropy <= rule.Entropy {
+				// entropy is too low, skip this finding
+				continue
+			}
+			// NOTE: this is a goofy hack to get around the fact there golang's regex engine
+			// does not support positive lookaheads. Ideally we would want to add a
+			// restriction on generic rules regex that requires the secret match group
+			// contains both numbers and alphabetical characters, not just alphabetical characters.
+			// What this bit of code does is check if the ruleid is prepended with "generic" and enforces the
+			// secret contains both digits and alphabetical characters.
+			// TODO: this should be replaced with stop words
+			if strings.HasPrefix(rule.RuleID, "generic") {
+				if !containsDigit(secret) {
+					continue
 				}
 				}
-			} else {
-				pathSkip = true
 			}
 			}
 		}
 		}
-		if pathSkip {
-			continue
+
+		findings = append(findings, finding)
+	}
+	return findings
+}
+
+// GitScan accepts a *gitdiff.File channel which contents a git history generated from
+// the output of `git log -p ...`. startGitScan will look at each file (patch) in the history
+// and determine if the patch contains any findings.
+func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
+	var (
+		gitdiffFiles <-chan *gitdiff.File
+		err          error
+	)
+	switch gitScanType {
+	case DetectType:
+		gitdiffFiles, err = git.GitLog(source, logOpts)
+		if err != nil {
+			return d.findings, err
+		}
+	case ProtectType:
+		gitdiffFiles, err = git.GitDiff(source, false)
+		if err != nil {
+			return d.findings, err
 		}
 		}
+	case ProtectStagedType:
+		gitdiffFiles, err = git.GitDiff(source, true)
+		if err != nil {
+			return d.findings, err
+		}
+	}
 
 
-		matchIndices := r.Regex.FindAllIndex(b, -1)
-		for _, m := range matchIndices {
-			location := getLocation(linePairs, m[0], m[1])
-			secret := strings.Trim(string(b[m[0]:m[1]]), "\n")
-			f := report.Finding{
-				Description: r.Description,
-				File:        filePath,
-				RuleID:      r.RuleID,
-				StartLine:   location.startLine,
-				EndLine:     location.endLine,
-				StartColumn: location.startColumn,
-				EndColumn:   location.endColumn,
-				Secret:      secret,
-				Match:       secret,
-				Tags:        r.Tags,
-			}
+	s := semgroup.NewGroup(context.Background(), 4)
 
 
-			if r.Allowlist.RegexAllowed(f.Secret) || cfg.Allowlist.RegexAllowed(f.Secret) {
+	for gitdiffFile := range gitdiffFiles {
+		gitdiffFile := gitdiffFile
+
+		// skip binary files
+		if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
+			continue
+		}
+
+		// Check if commit is allowed
+		commitSHA := ""
+		if gitdiffFile.PatchHeader != nil {
+			commitSHA = gitdiffFile.PatchHeader.SHA
+			if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
 				continue
 				continue
 			}
 			}
+		}
+		d.addCommit(commitSHA)
+
+		s.Go(func() error {
+			for _, textFragment := range gitdiffFile.TextFragments {
+				if textFragment == nil {
+					return nil
+				}
 
 
-			// extract secret from secret group if set
-			if r.SecretGroup != 0 {
-				groups := r.Regex.FindStringSubmatch(secret)
-				if len(groups) <= r.SecretGroup || len(groups) == 0 {
-					// Config validation should prevent this
-					break
+				fragment := Fragment{
+					Raw:       textFragment.Raw(gitdiff.OpAdd),
+					CommitSHA: commitSHA,
+					FilePath:  gitdiffFile.NewName,
+				}
+
+				for _, finding := range d.Detect(fragment) {
+					d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
 				}
 				}
-				secret = groups[r.SecretGroup]
-				f.Secret = secret
 			}
 			}
+			return nil
+		})
+	}
+
+	if err := s.Wait(); err != nil {
+		return d.findings, err
+	}
+	log.Debug().Msgf("%d commits scanned. Note: this number might be smaller than expected due to commits with no additions", len(d.commitMap))
+	return d.findings, nil
+}
 
 
-			// extract secret from secret group if set
-			if r.EntropySet() {
-				include, entropy := r.IncludeEntropy(secret)
-				if include {
-					f.Entropy = float32(entropy)
-					findings = append(findings, f)
+// DetectFiles accepts a path to a source directory or file and begins a scan of the
+// file or directory.
+func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
+	s := semgroup.NewGroup(context.Background(), 4)
+	paths := make(chan string)
+	s.Go(func() error {
+		defer close(paths)
+		return filepath.Walk(source,
+			func(path string, fInfo os.FileInfo, err error) error {
+				if err != nil {
+					return err
+				}
+				if fInfo.Name() == ".git" {
+					return filepath.SkipDir
+				}
+				if fInfo.Mode().IsRegular() {
+					paths <- path
 				}
 				}
-			} else {
-				findings = append(findings, f)
+				return nil
+			})
+	})
+	for pa := range paths {
+		p := pa
+		s.Go(func() error {
+			b, err := os.ReadFile(p)
+			if err != nil {
+				return err
 			}
 			}
-		}
+			fragment := Fragment{
+				Raw:      string(b),
+				FilePath: p,
+			}
+			for _, finding := range d.Detect(fragment) {
+				// need to add 1 since line counting starts at 1
+				finding.EndLine++
+				finding.StartLine++
+				d.addFinding(finding)
+			}
+
+			return nil
+		})
+	}
+
+	if err := s.Wait(); err != nil {
+		return d.findings, err
 	}
 	}
 
 
-	return dedupe(findings)
+	return d.findings, nil
 }
 }
 
 
-func printFinding(f report.Finding) {
-	var b []byte
-	b, _ = json.MarshalIndent(f, "", "	")
-	fmt.Println(string(b))
+// Detect scans the given fragment and returns a list of findings
+func (d *Detector) Detect(fragment Fragment) []report.Finding {
+	var findings []report.Finding
+
+	// check if filepath is allowed
+	if d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
+		fragment.FilePath == d.Config.Path {
+		return findings
+	}
+
+	// add newline indices for location calculation in detectRule
+	fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
+
+	for _, rule := range d.Config.Rules {
+		findings = append(findings, d.detectRule(fragment, rule)...)
+	}
+	return filter(findings, d.Redact)
 }
 }
 
 
-func dedupe(findings []report.Finding) []report.Finding {
-	var retFindings []report.Finding
-	for _, f := range findings {
-		include := true
-		if strings.Contains(strings.ToLower(f.RuleID), "generic") {
-			for _, fPrime := range findings {
-				if f.StartLine == fPrime.StartLine &&
-					f.EndLine == fPrime.EndLine &&
-					f.Commit == fPrime.Commit &&
-					f.RuleID != fPrime.RuleID &&
-					strings.Contains(fPrime.Secret, f.Secret) &&
-					!strings.Contains(strings.ToLower(fPrime.RuleID), "generic") {
-
-					genericMatch := strings.Replace(f.Match, f.Secret, "REDACTED", -1)
-					betterMatch := strings.Replace(fPrime.Match, fPrime.Secret, "REDACTED", -1)
-					log.Debug().Msgf("skipping %s finding (%s), %s rule takes precendence (%s)", f.RuleID, genericMatch, fPrime.RuleID, betterMatch)
-					include = false
-					break
-				}
-			}
-		}
-		if include {
-			retFindings = append(retFindings, f)
-		}
+// addFinding synchronously adds a finding to the findings slice
+func (d *Detector) addFinding(finding report.Finding) {
+	d.findingMutex.Lock()
+	d.findings = append(d.findings, finding)
+	if d.Verbose {
+		printFinding(finding)
 	}
 	}
+	d.findingMutex.Unlock()
+}
 
 
-	return retFindings
+// addCommit synchronously adds a commit to the commit slice
+func (d *Detector) addCommit(commit string) {
+	d.commitMap[commit] = true
 }
 }

+ 297 - 44
detect/detect_test.go

@@ -2,6 +2,7 @@ package detect
 
 
 import (
 import (
 	"fmt"
 	"fmt"
+	"os"
 	"path/filepath"
 	"path/filepath"
 	"testing"
 	"testing"
 
 
@@ -12,20 +13,22 @@ import (
 	"github.com/zricethezav/gitleaks/v8/report"
 	"github.com/zricethezav/gitleaks/v8/report"
 )
 )
 
 
-func TestDetectFindings(t *testing.T) {
+const configPath = "../testdata/config/"
+const repoBasePath = "../testdata/repos/"
+
+func TestDetect(t *testing.T) {
 	tests := []struct {
 	tests := []struct {
 		cfgName          string
 		cfgName          string
-		opts             Options
-		filePath         string
-		bytes            []byte
-		commit           string
+		fragment         Fragment
 		expectedFindings []report.Finding
 		expectedFindings []report.Finding
 		wantError        error
 		wantError        error
 	}{
 	}{
 		{
 		{
-			cfgName:  "escaped_character_group",
-			bytes:    []byte(`pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`),
-			filePath: "tmp.go",
+			cfgName: "escaped_character_group",
+			fragment: Fragment{
+				Raw:      `pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
 					Description: "PyPI upload token",
 					Description: "PyPI upload token",
@@ -38,13 +41,16 @@ func TestDetectFindings(t *testing.T) {
 					EndLine:     1,
 					EndLine:     1,
 					StartColumn: 1,
 					StartColumn: 1,
 					EndColumn:   86,
 					EndColumn:   86,
+					Entropy:     1.9606875,
 				},
 				},
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName:  "simple",
-			bytes:    []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath: "tmp.go",
+			cfgName: "simple",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
@@ -57,32 +63,41 @@ func TestDetectFindings(t *testing.T) {
 					EndLine:     1,
 					EndLine:     1,
 					StartColumn: 15,
 					StartColumn: 15,
 					EndColumn:   34,
 					EndColumn:   34,
+					Entropy:     3.0841837,
 				},
 				},
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName:          "allow_aws_re",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_aws_re",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 			expectedFindings: []report.Finding{},
 		},
 		},
 		{
 		{
-			cfgName:          "allow_path",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_path",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 			expectedFindings: []report.Finding{},
 		},
 		},
 		{
 		{
-			cfgName:          "allow_commit",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_commit",
+			fragment: Fragment{
+				Raw:       `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath:  "tmp.go",
+				CommitSHA: "allowthiscommit",
+			},
 			expectedFindings: []report.Finding{},
 			expectedFindings: []report.Finding{},
-			commit:           "allowthiscommit",
 		},
 		},
 		{
 		{
-			cfgName:  "entropy_group",
-			bytes:    []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath: "tmp.go",
+			cfgName: "entropy_group",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
 					Description: "Discord API key",
 					Description: "Discord API key",
@@ -100,15 +115,19 @@ func TestDetectFindings(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName:          "generic_with_py_path",
-			bytes:            []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath:         "tmp.go",
+			cfgName: "generic_with_py_path",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 			expectedFindings: []report.Finding{},
 		},
 		},
 		{
 		{
-			cfgName:  "generic_with_py_path",
-			bytes:    []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath: "tmp.py",
+			cfgName: "generic_with_py_path",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.py",
+			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
 					Description: "Generic API Key",
 					Description: "Generic API Key",
@@ -126,9 +145,11 @@ func TestDetectFindings(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName:  "path_only",
-			bytes:    []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath: "tmp.py",
+			cfgName: "path_only",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.py",
+			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
 					Description: "Python Files",
 					Description: "Python Files",
@@ -140,22 +161,28 @@ func TestDetectFindings(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName:          "bad_entropy_group",
-			bytes:            []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
-			filePath:         "tmp.go",
+			cfgName: "bad_entropy_group",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 			expectedFindings: []report.Finding{},
 			wantError:        fmt.Errorf("Discord API key invalid regex secret group 5, max regex secret group 3"),
 			wantError:        fmt.Errorf("Discord API key invalid regex secret group 5, max regex secret group 3"),
 		},
 		},
 		{
 		{
-			cfgName:          "simple",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         filepath.Join(configPath, "simple.toml"),
+			cfgName: "simple",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: filepath.Join(configPath, "simple.toml"),
+			},
 			expectedFindings: []report.Finding{},
 			expectedFindings: []report.Finding{},
 		},
 		},
 		{
 		{
-			cfgName:          "allow_global_aws_re",
-			bytes:            []byte(`awsToken := \"AKIALALEMEL33243OLIA\"`),
-			filePath:         "tmp.go",
+			cfgName: "allow_global_aws_re",
+			fragment: Fragment{
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
 			expectedFindings: []report.Finding{},
 			expectedFindings: []report.Finding{},
 		},
 		},
 	}
 	}
@@ -171,7 +198,10 @@ func TestDetectFindings(t *testing.T) {
 		}
 		}
 
 
 		var vc config.ViperConfig
 		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
 		cfg, err := vc.Translate()
 		cfg, err := vc.Translate()
 		cfg.Path = filepath.Join(configPath, tt.cfgName+".toml")
 		cfg.Path = filepath.Join(configPath, tt.cfgName+".toml")
 		if tt.wantError != nil {
 		if tt.wantError != nil {
@@ -180,8 +210,231 @@ func TestDetectFindings(t *testing.T) {
 			}
 			}
 			assert.Equal(t, tt.wantError, err)
 			assert.Equal(t, tt.wantError, err)
 		}
 		}
+		d := NewDetector(cfg)
 
 
-		findings := DetectFindings(cfg, tt.bytes, tt.filePath, tt.commit)
+		findings := d.Detect(tt.fragment)
 		assert.ElementsMatch(t, tt.expectedFindings, findings)
 		assert.ElementsMatch(t, tt.expectedFindings, findings)
 	}
 	}
 }
 }
+
+// TestFromGit tests the FromGit function
+func TestFromGit(t *testing.T) {
+	tests := []struct {
+		cfgName          string
+		source           string
+		logOpts          string
+		expectedFindings []report.Finding
+	}{
+		{
+			source:  filepath.Join(repoBasePath, "small"),
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   20,
+					EndLine:     20,
+					StartColumn: 19,
+					EndColumn:   38,
+					Secret:      "AKIALALEMEL33243OLIA",
+					Match:       "AKIALALEMEL33243OLIA",
+					File:        "main.go",
+					Date:        "2021-11-02T23:37:53Z",
+					Commit:      "1b6da43b82b22e4eaa10bcf8ee591e91abbfc587",
+					Author:      "Zachary Rice",
+					Email:       "zricer@protonmail.com",
+					Message:     "Accidentally add a secret",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+				{
+					Description: "AWS Access Key",
+					StartLine:   9,
+					EndLine:     9,
+					StartColumn: 17,
+					EndColumn:   36,
+					Secret:      "AKIALALEMEL33243OLIA",
+					Match:       "AKIALALEMEL33243OLIA",
+					File:        "foo/foo.go",
+					Date:        "2021-11-02T23:48:06Z",
+					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
+					Author:      "Zach Rice",
+					Email:       "zricer@protonmail.com",
+					Message:     "adding foo package with secret",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+		{
+			source:  filepath.Join(repoBasePath, "small"),
+			logOpts: "--all foo...",
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   9,
+					EndLine:     9,
+					StartColumn: 17,
+					EndColumn:   36,
+					Secret:      "AKIALALEMEL33243OLIA",
+					Match:       "AKIALALEMEL33243OLIA",
+					Date:        "2021-11-02T23:48:06Z",
+					File:        "foo/foo.go",
+					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
+					Author:      "Zach Rice",
+					Email:       "zricer@protonmail.com",
+					Message:     "adding foo package with secret",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+	}
+
+	err := moveDotGit("dotGit", ".git")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		if err := moveDotGit(".git", "dotGit"); err != nil {
+			t.Error(err)
+		}
+	}()
+
+	for _, tt := range tests {
+
+		viper.AddConfigPath(configPath)
+		viper.SetConfigName("simple")
+		viper.SetConfigType("toml")
+		err = viper.ReadInConfig()
+		if err != nil {
+			t.Error(err)
+		}
+
+		var vc config.ViperConfig
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
+		cfg, err := vc.Translate()
+		if err != nil {
+			t.Error(err)
+		}
+		detector := NewDetector(cfg)
+		findings, err := detector.DetectGit(tt.source, tt.logOpts, DetectType)
+		if err != nil {
+			t.Error(err)
+		}
+
+		for _, f := range findings {
+			f.Match = "" // remove lines cause copying and pasting them has some wack formatting
+		}
+		assert.ElementsMatch(t, tt.expectedFindings, findings)
+	}
+}
+
+// TestFromGit tests the FromGit function
+func TestFromFiles(t *testing.T) {
+	tests := []struct {
+		cfgName          string
+		source           string
+		expectedFindings []report.Finding
+	}{
+		{
+			source:  filepath.Join(repoBasePath, "nogit"),
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   20,
+					EndLine:     20,
+					StartColumn: 16,
+					EndColumn:   35,
+					Match:       "AKIALALEMEL33243OLIA",
+					Secret:      "AKIALALEMEL33243OLIA",
+					File:        "../testdata/repos/nogit/main.go",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+		{
+			source:  filepath.Join(repoBasePath, "nogit", "main.go"),
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "AWS Access Key",
+					StartLine:   20,
+					EndLine:     20,
+					StartColumn: 16,
+					EndColumn:   35,
+					Match:       "AKIALALEMEL33243OLIA",
+					Secret:      "AKIALALEMEL33243OLIA",
+					File:        "../testdata/repos/nogit/main.go",
+					RuleID:      "aws-access-key",
+					Tags:        []string{"key", "AWS"},
+					Entropy:     3.0841837,
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		viper.AddConfigPath(configPath)
+		viper.SetConfigName("simple")
+		viper.SetConfigType("toml")
+		err := viper.ReadInConfig()
+		if err != nil {
+			t.Error(err)
+		}
+
+		var vc config.ViperConfig
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
+		cfg, _ := vc.Translate()
+		detector := NewDetector(cfg)
+		findings, err := detector.DetectFiles(tt.source)
+		if err != nil {
+			t.Error(err)
+		}
+
+		assert.ElementsMatch(t, tt.expectedFindings, findings)
+	}
+}
+
+func moveDotGit(from, to string) error {
+	repoDirs, err := os.ReadDir("../testdata/repos")
+	if err != nil {
+		return err
+	}
+	for _, dir := range repoDirs {
+		if to == ".git" {
+			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
+			if os.IsNotExist(err) {
+				// dont want to delete the only copy of .git accidentally
+				continue
+			}
+			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
+		}
+		if !dir.IsDir() {
+			continue
+		}
+		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
+		if os.IsNotExist(err) {
+			continue
+		}
+
+		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
+			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}

+ 0 - 77
detect/files.go

@@ -1,77 +0,0 @@
-package detect
-
-import (
-	"context"
-	"os"
-	"path/filepath"
-	"sync"
-
-	"golang.org/x/sync/errgroup"
-
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-// FromFiles opens the directory or file specified in source and checks each file against the rules
-// from the configuration. If any secrets are found, they are added to the list of findings.
-func FromFiles(source string, cfg config.Config, outputOptions Options) ([]report.Finding, error) {
-	var (
-		findings []report.Finding
-		mu       sync.Mutex
-	)
-	concurrentGoroutines := make(chan struct{}, MAXGOROUTINES)
-	g, _ := errgroup.WithContext(context.Background())
-	paths := make(chan string)
-	g.Go(func() error {
-		defer close(paths)
-		return filepath.Walk(source,
-			func(path string, fInfo os.FileInfo, err error) error {
-				if err != nil {
-					return err
-				}
-				if fInfo.Name() == ".git" {
-					return filepath.SkipDir
-				}
-				if fInfo.Mode().IsRegular() {
-					paths <- path
-				}
-				return nil
-			})
-	})
-	for pa := range paths {
-		p := pa
-		concurrentGoroutines <- struct{}{}
-		g.Go(func() error {
-			defer func() {
-				<-concurrentGoroutines
-			}()
-			b, err := os.ReadFile(p)
-			if err != nil {
-				return err
-			}
-			fis := DetectFindings(cfg, b, p, "")
-			for _, fi := range fis {
-				// need to add 1 since line counting starts at 1
-				fi.StartLine++
-				fi.EndLine++
-
-				if outputOptions.Redact {
-					fi.Redact()
-				}
-				if outputOptions.Verbose {
-					printFinding(fi)
-				}
-				mu.Lock()
-				findings = append(findings, fi)
-				mu.Unlock()
-			}
-			return nil
-		})
-	}
-
-	if err := g.Wait(); err != nil {
-		return findings, err
-	}
-
-	return findings, nil
-}

+ 0 - 80
detect/files_test.go

@@ -1,80 +0,0 @@
-package detect
-
-import (
-	"path/filepath"
-	"testing"
-
-	"github.com/spf13/viper"
-	"github.com/stretchr/testify/assert"
-
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-// TestFromGit tests the FromGit function
-func TestFromFiles(t *testing.T) {
-	tests := []struct {
-		cfgName          string
-		opts             Options
-		source           string
-		expectedFindings []report.Finding
-	}{
-		{
-			source:  filepath.Join(repoBasePath, "nogit"),
-			cfgName: "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   20,
-					EndLine:     20,
-					StartColumn: 16,
-					EndColumn:   35,
-					Match:       "AKIALALEMEL33243OLIA",
-					Secret:      "AKIALALEMEL33243OLIA",
-					File:        "../testdata/repos/nogit/main.go",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-		{
-			source:  filepath.Join(repoBasePath, "nogit", "main.go"),
-			cfgName: "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   20,
-					EndLine:     20,
-					StartColumn: 16,
-					EndColumn:   35,
-					Match:       "AKIALALEMEL33243OLIA",
-					Secret:      "AKIALALEMEL33243OLIA",
-					File:        "../testdata/repos/nogit/main.go",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		viper.AddConfigPath(configPath)
-		viper.SetConfigName("simple")
-		viper.SetConfigType("toml")
-		err := viper.ReadInConfig()
-		if err != nil {
-			t.Error(err)
-		}
-
-		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
-		cfg, _ := vc.Translate()
-
-		findings, err := FromFiles(tt.source, cfg, tt.opts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		assert.ElementsMatch(t, tt.expectedFindings, findings)
-	}
-}

+ 0 - 95
detect/git.go

@@ -1,95 +0,0 @@
-package detect
-
-import (
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/gitleaks/go-gitdiff/gitdiff"
-	"github.com/rs/zerolog/log"
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-// FromGit accepts a gitdiff.File channel (structure output from `git log -p`) and a configuration
-// struct. Files from the gitdiff.File channel are then checked against each rule in the configuration to
-// check for secrets. If any secrets are found, they are added to the list of findings.
-func FromGit(files <-chan *gitdiff.File, cfg config.Config, outputOptions Options) []report.Finding {
-	var findings []report.Finding
-	mu := sync.Mutex{}
-	wg := sync.WaitGroup{}
-	concurrentGoroutines := make(chan struct{}, MAXGOROUTINES)
-	commitMap := make(map[string]bool)
-	for f := range files {
-		// keep track of commits for logging
-		if f.PatchHeader != nil {
-			commitMap[f.PatchHeader.SHA] = true
-		}
-		wg.Add(1)
-		concurrentGoroutines <- struct{}{}
-		go func(f *gitdiff.File) {
-			defer func() {
-				wg.Done()
-				<-concurrentGoroutines
-			}()
-			if f.IsBinary {
-				return
-			}
-
-			if f.IsDelete {
-				return
-			}
-
-			commitSHA := ""
-
-			// Check if commit is allowed
-			if f.PatchHeader != nil {
-				commitSHA = f.PatchHeader.SHA
-				if cfg.Allowlist.CommitAllowed(f.PatchHeader.SHA) {
-					return
-				}
-			}
-
-			for _, tf := range f.TextFragments {
-				if f.TextFragments == nil {
-					// TODO fix this in gitleaks gitdiff fork
-					// https://github.com/gitleaks/gitleaks/issues/11
-					continue
-				}
-
-				for _, fi := range DetectFindings(cfg, []byte(tf.Raw(gitdiff.OpAdd)), f.NewName, commitSHA) {
-					// don't add to start/end lines if finding is from a file only rule
-					if !strings.HasPrefix(fi.Match, "file detected") {
-						fi.StartLine += int(tf.NewPosition)
-						fi.EndLine += int(tf.NewPosition)
-					}
-					if f.PatchHeader != nil {
-						fi.Commit = f.PatchHeader.SHA
-						fi.Message = f.PatchHeader.Message()
-						if f.PatchHeader.Author != nil {
-							fi.Author = f.PatchHeader.Author.Name
-							fi.Email = f.PatchHeader.Author.Email
-						}
-						fi.Date = f.PatchHeader.AuthorDate.UTC().Format(time.RFC3339)
-					}
-
-					if outputOptions.Redact {
-						fi.Redact()
-					}
-
-					if outputOptions.Verbose {
-						printFinding(fi)
-					}
-					mu.Lock()
-					findings = append(findings, fi)
-					mu.Unlock()
-
-				}
-			}
-		}(f)
-	}
-
-	wg.Wait()
-	log.Debug().Msgf("%d commits scanned. Note: this number might be smaller than expected due to commits with no additions", len(commitMap))
-	return findings
-}

+ 0 - 0
git/git.go → detect/git/git.go


+ 158 - 0
detect/git/git_test.go

@@ -0,0 +1,158 @@
+package git_test
+
+// TODO: commenting out this test for now because it's flaky. Alternatives to consider to get this working:
+// -- use `git stash` instead of `restore()`
+
+// const repoBasePath = "../../testdata/repos/"
+
+// const expectPath = "../../testdata/expected/"
+
+// func TestGitLog(t *testing.T) {
+// 	tests := []struct {
+// 		source   string
+// 		logOpts  string
+// 		expected string
+// 	}{
+// 		{
+// 			source:   filepath.Join(repoBasePath, "small"),
+// 			expected: filepath.Join(expectPath, "git", "small.txt"),
+// 		},
+// 		{
+// 			source:   filepath.Join(repoBasePath, "small"),
+// 			expected: filepath.Join(expectPath, "git", "small-branch-foo.txt"),
+// 			logOpts:  "--all foo...",
+// 		},
+// 	}
+
+// 	err := moveDotGit("dotGit", ".git")
+// 	if err != nil {
+// 		t.Fatal(err)
+// 	}
+// 	defer func() {
+// 		if err = moveDotGit(".git", "dotGit"); err != nil {
+// 			t.Fatal(err)
+// 		}
+// 	}()
+
+// 	for _, tt := range tests {
+// 		files, err := git.GitLog(tt.source, tt.logOpts)
+// 		if err != nil {
+// 			t.Error(err)
+// 		}
+
+// 		var diffSb strings.Builder
+// 		for f := range files {
+// 			for _, tf := range f.TextFragments {
+// 				diffSb.WriteString(tf.Raw(gitdiff.OpAdd))
+// 			}
+// 		}
+
+// 		expectedBytes, err := os.ReadFile(tt.expected)
+// 		if err != nil {
+// 			t.Error(err)
+// 		}
+// 		expected := string(expectedBytes)
+// 		if expected != diffSb.String() {
+// 			// write string builder to .got file using os.Create
+// 			err = os.WriteFile(strings.Replace(tt.expected, ".txt", ".got.txt", 1), []byte(diffSb.String()), 0644)
+// 			if err != nil {
+// 				t.Error(err)
+// 			}
+// 			t.Error("expected: ", expected, "got: ", diffSb.String())
+// 		}
+// 	}
+// }
+
+// func TestGitDiff(t *testing.T) {
+// 	tests := []struct {
+// 		source    string
+// 		expected  string
+// 		additions string
+// 		target    string
+// 	}{
+// 		{
+// 			source:    filepath.Join(repoBasePath, "small"),
+// 			expected:  "this line is added\nand another one",
+// 			additions: "this line is added\nand another one",
+// 			target:    filepath.Join(repoBasePath, "small", "main.go"),
+// 		},
+// 	}
+
+// 	err := moveDotGit("dotGit", ".git")
+// 	if err != nil {
+// 		t.Fatal(err)
+// 	}
+// 	defer func() {
+// 		if err = moveDotGit(".git", "dotGit"); err != nil {
+// 			t.Fatal(err)
+// 		}
+// 	}()
+
+// 	for _, tt := range tests {
+// 		noChanges, err := os.ReadFile(tt.target)
+// 		if err != nil {
+// 			t.Error(err)
+// 		}
+// 		err = os.WriteFile(tt.target, []byte(tt.additions), 0644)
+// 		if err != nil {
+// 			restore(tt.target, noChanges, t)
+// 			t.Error(err)
+// 		}
+
+// 		files, err := git.GitDiff(tt.source, false)
+// 		if err != nil {
+// 			restore(tt.target, noChanges, t)
+// 			t.Error(err)
+// 		}
+
+// 		for f := range files {
+// 			sb := strings.Builder{}
+// 			for _, tf := range f.TextFragments {
+// 				sb.WriteString(tf.Raw(gitdiff.OpAdd))
+// 			}
+// 			if sb.String() != tt.expected {
+// 				restore(tt.target, noChanges, t)
+// 				t.Error("expected: ", tt.expected, "got: ", sb.String())
+// 			}
+// 		}
+// 		restore(tt.target, noChanges, t)
+// 	}
+// }
+
+// func restore(path string, data []byte, t *testing.T) {
+// 	err := os.WriteFile(path, data, 0644)
+// 	if err != nil {
+// 		t.Fatal(err)
+// 	}
+// }
+
+// func moveDotGit(from, to string) error {
+// 	repoDirs, err := os.ReadDir("../../testdata/repos")
+// 	if err != nil {
+// 		return err
+// 	}
+// 	for _, dir := range repoDirs {
+// 		if to == ".git" {
+// 			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
+// 			if os.IsNotExist(err) {
+// 				// dont want to delete the only copy of .git accidentally
+// 				continue
+// 			}
+// 			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
+// 		}
+// 		if !dir.IsDir() {
+// 			continue
+// 		}
+// 		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
+// 		if os.IsNotExist(err) {
+// 			continue
+// 		}
+
+// 		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
+// 			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
+// 		if err != nil {
+// 			return err
+// 		}
+// 	}
+// 	return nil
+// }

+ 0 - 160
detect/git_test.go

@@ -1,160 +0,0 @@
-package detect
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-
-	"github.com/spf13/viper"
-	"github.com/stretchr/testify/assert"
-
-	"github.com/zricethezav/gitleaks/v8/config"
-	"github.com/zricethezav/gitleaks/v8/git"
-	"github.com/zricethezav/gitleaks/v8/report"
-)
-
-const repoBasePath = "../testdata/repos/"
-const expectPath = "../testdata/expected/"
-const configPath = "../testdata/config/"
-
-// TestFromGit tests the FromGit function
-func TestFromGit(t *testing.T) {
-	tests := []struct {
-		cfgName          string
-		opts             Options
-		source           string
-		logOpts          string
-		expected         string
-		expectedFindings []report.Finding
-	}{
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small.txt"),
-			cfgName:  "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   20,
-					EndLine:     20,
-					StartColumn: 19,
-					EndColumn:   38,
-					Secret:      "AKIALALEMEL33243OLIA",
-					Match:       "AKIALALEMEL33243OLIA",
-					File:        "main.go",
-					Date:        "2021-11-02T23:37:53Z",
-					Commit:      "1b6da43b82b22e4eaa10bcf8ee591e91abbfc587",
-					Author:      "Zachary Rice",
-					Email:       "zricer@protonmail.com",
-					Message:     "Accidentally add a secret",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-				{
-					Description: "AWS Access Key",
-					StartLine:   9,
-					EndLine:     9,
-					StartColumn: 17,
-					EndColumn:   36,
-					Secret:      "AKIALALEMEL33243OLIA",
-					Match:       "AKIALALEMEL33243OLIA",
-					File:        "foo/foo.go",
-					Date:        "2021-11-02T23:48:06Z",
-					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
-					Author:      "Zach Rice",
-					Email:       "zricer@protonmail.com",
-					Message:     "adding foo package with secret",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small-branch-foo.txt"),
-			logOpts:  "--all foo...",
-			cfgName:  "simple",
-			expectedFindings: []report.Finding{
-				{
-					Description: "AWS Access Key",
-					StartLine:   9,
-					EndLine:     9,
-					StartColumn: 17,
-					EndColumn:   36,
-					Secret:      "AKIALALEMEL33243OLIA",
-					Match:       "AKIALALEMEL33243OLIA",
-					Date:        "2021-11-02T23:48:06Z",
-					File:        "foo/foo.go",
-					Commit:      "491504d5a31946ce75e22554cc34203d8e5ff3ca",
-					Author:      "Zach Rice",
-					Email:       "zricer@protonmail.com",
-					Message:     "adding foo package with secret",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
-				},
-			},
-		},
-	}
-
-	err := moveDotGit("dotGit", ".git")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer moveDotGit(".git", "dotGit")
-
-	for _, tt := range tests {
-		files, err := git.GitLog(tt.source, tt.logOpts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		viper.AddConfigPath(configPath)
-		viper.SetConfigName("simple")
-		viper.SetConfigType("toml")
-		err = viper.ReadInConfig()
-		if err != nil {
-			t.Error(err)
-		}
-
-		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
-		cfg, _ := vc.Translate()
-
-		findings := FromGit(files, cfg, tt.opts)
-		for _, f := range findings {
-			f.Match = "" // remove lines cause copying and pasting them has some wack formatting
-		}
-		assert.ElementsMatch(t, tt.expectedFindings, findings)
-	}
-}
-
-func moveDotGit(from, to string) error {
-	repoDirs, err := os.ReadDir("../testdata/repos")
-	if err != nil {
-		return err
-	}
-	for _, dir := range repoDirs {
-		if to == ".git" {
-			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
-			if os.IsNotExist(err) {
-				// dont want to delete the only copy of .git accidentally
-				continue
-			}
-			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
-		}
-		if !dir.IsDir() {
-			continue
-		}
-		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
-		if os.IsNotExist(err) {
-			continue
-		}
-
-		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
-			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}

+ 5 - 2
detect/location.go

@@ -10,7 +10,7 @@ type Location struct {
 	endLineIndex   int
 	endLineIndex   int
 }
 }
 
 
-func getLocation(linePairs [][]int, start int, end int) Location {
+func location(fragment Fragment, matchIndex []int) Location {
 	var (
 	var (
 		prevNewLine int
 		prevNewLine int
 		location    Location
 		location    Location
@@ -18,7 +18,10 @@ func getLocation(linePairs [][]int, start int, end int) Location {
 		_lineNum    int
 		_lineNum    int
 	)
 	)
 
 
-	for lineNum, pair := range linePairs {
+	start := matchIndex[0]
+	end := matchIndex[1]
+
+	for lineNum, pair := range fragment.newlineIndices {
 		_lineNum = lineNum
 		_lineNum = lineNum
 		newLineByteIndex := pair[0]
 		newLineByteIndex := pair[0]
 		if prevNewLine <= start && start < newLineByteIndex {
 		if prevNewLine <= start && start < newLineByteIndex {

+ 1 - 1
detect/location_test.go

@@ -49,7 +49,7 @@ func TestGetLocation(t *testing.T) {
 	}
 	}
 
 
 	for _, test := range tests {
 	for _, test := range tests {
-		loc := getLocation(test.linePairs, test.start, test.end)
+		loc := location(Fragment{newlineIndices: test.linePairs}, []int{test.start, test.end})
 		if loc != test.wantLocation {
 		if loc != test.wantLocation {
 			t.Errorf("\nstartLine %d\nstartColumn: %d\nendLine: %d\nendColumn: %d\nstartLineIndex: %d\nendlineIndex %d",
 			t.Errorf("\nstartLine %d\nstartColumn: %d\nendLine: %d\nendColumn: %d\nstartLineIndex: %d\nendlineIndex %d",
 				loc.startLine, loc.startColumn, loc.endLine, loc.endColumn, loc.startLineIndex, loc.endLineIndex)
 				loc.startLine, loc.startColumn, loc.endLine, loc.endColumn, loc.startLineIndex, loc.endLineIndex)

+ 107 - 0
detect/utils.go

@@ -0,0 +1,107 @@
+package detect
+
+import (
+	"encoding/json"
+	"fmt"
+	"math"
+	"strings"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v8/report"
+
+	"github.com/gitleaks/go-gitdiff/gitdiff"
+	"github.com/rs/zerolog/log"
+)
+
+// augmentGitFinding updates the start and end line numbers of a finding to include the
+// delta from the git diff
+func augmentGitFinding(finding report.Finding, textFragment *gitdiff.TextFragment, f *gitdiff.File) report.Finding {
+	if !strings.HasPrefix(finding.Match, "file detected") {
+		finding.StartLine += int(textFragment.NewPosition)
+		finding.EndLine += int(textFragment.NewPosition)
+	}
+
+	if f.PatchHeader != nil {
+		finding.Commit = f.PatchHeader.SHA
+		finding.Message = f.PatchHeader.Message()
+		if f.PatchHeader.Author != nil {
+			finding.Author = f.PatchHeader.Author.Name
+			finding.Email = f.PatchHeader.Author.Email
+		}
+		finding.Date = f.PatchHeader.AuthorDate.UTC().Format(time.RFC3339)
+	}
+	return finding
+}
+
+// shannonEntropy calculates the entropy of data using the formula defined here:
+// https://en.wiktionary.org/wiki/Shannon_entropy
+// Another way to think about what this is doing is calculating the number of bits
+// needed to on average encode the data. So, the higher the entropy, the more random the data, the
+// more bits needed to encode that data.
+func shannonEntropy(data string) (entropy float64) {
+	if data == "" {
+		return 0
+	}
+
+	charCounts := make(map[rune]int)
+	for _, char := range data {
+		charCounts[char]++
+	}
+
+	invLength := 1.0 / float64(len(data))
+	for _, count := range charCounts {
+		freq := float64(count) * invLength
+		entropy -= freq * math.Log2(freq)
+	}
+
+	return entropy
+}
+
+// filter will dedupe and redact findings
+func filter(findings []report.Finding, redact bool) []report.Finding {
+	var retFindings []report.Finding
+	for _, f := range findings {
+		include := true
+		if strings.Contains(strings.ToLower(f.RuleID), "generic") {
+			for _, fPrime := range findings {
+				if f.StartLine == fPrime.StartLine &&
+					f.EndLine == fPrime.EndLine &&
+					f.Commit == fPrime.Commit &&
+					f.RuleID != fPrime.RuleID &&
+					strings.Contains(fPrime.Secret, f.Secret) &&
+					!strings.Contains(strings.ToLower(fPrime.RuleID), "generic") {
+
+					genericMatch := strings.Replace(f.Match, f.Secret, "REDACTED", -1)
+					betterMatch := strings.Replace(fPrime.Match, fPrime.Secret, "REDACTED", -1)
+					log.Debug().Msgf("skipping %s finding (%s), %s rule takes precendence (%s)", f.RuleID, genericMatch, fPrime.RuleID, betterMatch)
+					include = false
+					break
+				}
+			}
+		}
+		if redact {
+			f.Redact()
+		}
+		if include {
+			retFindings = append(retFindings, f)
+		}
+	}
+	return retFindings
+}
+
+func printFinding(f report.Finding) {
+	var b []byte
+	b, _ = json.MarshalIndent(f, "", "	")
+	fmt.Println(string(b))
+}
+
+func containsDigit(s string) bool {
+	for _, c := range s {
+		switch c {
+		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return true
+		}
+
+	}
+	return false
+}

+ 0 - 157
git/git_test.go

@@ -1,157 +0,0 @@
-package git_test
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-
-	"github.com/gitleaks/go-gitdiff/gitdiff"
-	"github.com/zricethezav/gitleaks/v8/git"
-)
-
-const repoBasePath = "../testdata/repos/"
-const expectPath = "../testdata/expected/"
-
-func TestGitLog(t *testing.T) {
-	tests := []struct {
-		source   string
-		logOpts  string
-		expected string
-	}{
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small.txt"),
-		},
-		{
-			source:   filepath.Join(repoBasePath, "small"),
-			expected: filepath.Join(expectPath, "git", "small-branch-foo.txt"),
-			logOpts:  "--all foo...",
-		},
-	}
-
-	err := moveDotGit("dotGit", ".git")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer moveDotGit(".git", "dotGit")
-
-	for _, tt := range tests {
-		files, err := git.GitLog(tt.source, tt.logOpts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		var diffSb strings.Builder
-		for f := range files {
-			for _, tf := range f.TextFragments {
-				diffSb.WriteString(tf.Raw(gitdiff.OpAdd))
-			}
-		}
-
-		expectedBytes, err := os.ReadFile(tt.expected)
-		if err != nil {
-			t.Error(err)
-		}
-		expected := string(expectedBytes)
-		if expected != diffSb.String() {
-			// write string builder to .got file using os.Create
-			err = os.WriteFile(strings.Replace(tt.expected, ".txt", ".got.txt", 1), []byte(diffSb.String()), 0644)
-			if err != nil {
-				t.Error(err)
-			}
-			t.Error("expected: ", expected, "got: ", diffSb.String())
-		}
-	}
-}
-
-func TestGitDiff(t *testing.T) {
-	tests := []struct {
-		source    string
-		expected  string
-		additions string
-		target    string
-	}{
-		{
-			source:    filepath.Join(repoBasePath, "small"),
-			expected:  "this line is added\nand another one",
-			additions: "this line is added\nand another one",
-			target:    filepath.Join(repoBasePath, "small", "main.go"),
-		},
-	}
-
-	err := moveDotGit("dotGit", ".git")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer moveDotGit(".git", "dotGit")
-
-	for _, tt := range tests {
-		noChanges, err := os.ReadFile(tt.target)
-		if err != nil {
-			t.Error(err)
-		}
-		err = os.WriteFile(tt.target, []byte(tt.additions), 0644)
-		if err != nil {
-			restore(tt.target, noChanges, t)
-			t.Error(err)
-		}
-
-		files, err := git.GitDiff(tt.source, false)
-		if err != nil {
-			restore(tt.target, noChanges, t)
-			t.Error(err)
-		}
-
-		for f := range files {
-			sb := strings.Builder{}
-			for _, tf := range f.TextFragments {
-				sb.WriteString(tf.Raw(gitdiff.OpAdd))
-			}
-			if sb.String() != tt.expected {
-				restore(tt.target, noChanges, t)
-				t.Error("expected: ", tt.expected, "got: ", sb.String())
-			}
-		}
-		restore(tt.target, noChanges, t)
-	}
-}
-
-func restore(path string, data []byte, t *testing.T) {
-	err := os.WriteFile(path, data, 0644)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func moveDotGit(from, to string) error {
-	repoDirs, err := os.ReadDir("../testdata/repos")
-	if err != nil {
-		return err
-	}
-	for _, dir := range repoDirs {
-		if to == ".git" {
-			_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), "dotGit"))
-			if os.IsNotExist(err) {
-				// dont want to delete the only copy of .git accidentally
-				continue
-			}
-			os.RemoveAll(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), ".git"))
-		}
-		if !dir.IsDir() {
-			continue
-		}
-		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from))
-		if os.IsNotExist(err) {
-			continue
-		}
-
-		err = os.Rename(fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), from),
-			fmt.Sprintf("%s/%s/%s", repoBasePath, dir.Name(), to))
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}

+ 4 - 4
go.mod

@@ -3,12 +3,12 @@ module github.com/zricethezav/gitleaks/v8
 go 1.17
 go 1.17
 
 
 require (
 require (
+	github.com/fatih/semgroup v1.2.0
 	github.com/gitleaks/go-gitdiff v0.7.4
 	github.com/gitleaks/go-gitdiff v0.7.4
-	github.com/rs/zerolog v1.25.0
+	github.com/rs/zerolog v1.26.1
 	github.com/spf13/cobra v1.2.1
 	github.com/spf13/cobra v1.2.1
 	github.com/spf13/viper v1.8.1
 	github.com/spf13/viper v1.8.1
 	github.com/stretchr/testify v1.7.0
 	github.com/stretchr/testify v1.7.0
-	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
 )
 )
 
 
 require (
 require (
@@ -25,9 +25,9 @@ require (
 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/subosito/gotenv v1.2.0 // indirect
 	github.com/subosito/gotenv v1.2.0 // indirect
+	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
 	golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 // indirect
 	golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 // indirect
-	golang.org/x/text v0.3.5 // indirect
-	gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
+	golang.org/x/text v0.3.6 // indirect
 	gopkg.in/ini.v1 v1.62.0 // indirect
 	gopkg.in/ini.v1 v1.62.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
 	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect

+ 13 - 6
go.sum

@@ -67,6 +67,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m
 github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
 github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
+github.com/fatih/semgroup v1.2.0 h1:h/OLXwEM+3NNyAdZEpMiH1OzfplU09i2qXPVThGZvyg=
+github.com/fatih/semgroup v1.2.0/go.mod h1:1KAD4iIYfXjE4U13B48VM4z9QUwV5Tt8O4rS879kgm8=
 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
@@ -209,8 +211,8 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
-github.com/rs/zerolog v1.25.0 h1:Rj7XygbUHKUlDPcVdoLyR91fJBsduXj5fRxyqIQj/II=
-github.com/rs/zerolog v1.25.0/go.mod h1:7KHcEGe0QZPOm2IE4Kpb5rTh6n1h2hIgS5OOnu1rUaI=
+github.com/rs/zerolog v1.26.1 h1:/ihwxqH+4z8UxyI70wM1z9yCvkWcfz/a3mj48k/Zngc=
+github.com/rs/zerolog v1.26.1/go.mod h1:/wSSJWX7lVrsOwlbyTRSOJvqRlc+WjWlfes+CiJ+tmc=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
@@ -246,6 +248,7 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
 github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
 go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
 go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
 go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
 go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ=
 go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ=
@@ -266,6 +269,7 @@ golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -337,6 +341,7 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -401,7 +406,9 @@ golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 h1:WecRHqgE09JBkh/584XIE6PMz5KKE/vER4izNUi30AQ=
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 h1:WecRHqgE09JBkh/584XIE6PMz5KKE/vER4izNUi30AQ=
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -411,8 +418,9 @@ golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.5 h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -467,7 +475,7 @@ golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
 golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
 golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
-golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -575,9 +583,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
-gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
 gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU=
 gopkg.in/ini.v1 v1.62.0 h1:duBzk771uxoUuOlyRLkHsygud9+5lrlGjdFBb4mSKDU=
 gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
 gopkg.in/ini.v1 v1.62.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=

+ 0 - 8
report/finding.go

@@ -1,7 +1,6 @@
 package report
 package report
 
 
 import (
 import (
-	"strconv"
 	"strings"
 	"strings"
 )
 )
 
 
@@ -43,10 +42,3 @@ func (f *Finding) Redact() {
 	f.Match = strings.Replace(f.Match, f.Secret, "REDACTED", -1)
 	f.Match = strings.Replace(f.Match, f.Secret, "REDACTED", -1)
 	f.Secret = "REDACT"
 	f.Secret = "REDACT"
 }
 }
-
-func (f *Finding) Hash() string {
-	return f.Secret + f.Commit +
-		strconv.Itoa(f.EndLine) +
-		strconv.Itoa(f.StartLine)
-
-}

+ 4 - 5
report/report.go

@@ -21,13 +21,12 @@ func Write(findings []Finding, cfg config.Config, ext string, reportPath string)
 	ext = strings.ToLower(ext)
 	ext = strings.ToLower(ext)
 	switch ext {
 	switch ext {
 	case ".json", "json":
 	case ".json", "json":
-		writeJson(findings, file)
+		err = writeJson(findings, file)
 	case ".csv", "csv":
 	case ".csv", "csv":
-		writeCsv(findings, file)
+		err = writeCsv(findings, file)
 	case ".sarif", "sarif":
 	case ".sarif", "sarif":
-		writeSarif(cfg, findings, file)
-
+		err = writeSarif(cfg, findings, file)
 	}
 	}
 
 
-	return nil
+	return err
 }
 }

+ 5 - 1
report/sarif_test.go

@@ -63,7 +63,11 @@ func TestWriteSarif(t *testing.T) {
 		}
 		}
 
 
 		var vc config.ViperConfig
 		var vc config.ViperConfig
-		viper.Unmarshal(&vc)
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
+
 		cfg, err := vc.Translate()
 		cfg, err := vc.Translate()
 		if err != nil {
 		if err != nil {
 			t.Error(err)
 			t.Error(err)