Quellcode durchsuchen

Merge branch 'master' of github.com:gitleaks/gitleaks into archive-handling

Zach vor 9 Monaten
Ursprung
Commit
56e409d9f1
85 geänderte Dateien mit 2516 neuen und 1152 gelöschten Zeilen
  1. 1 1
      .github/workflows/test.yml
  2. 21 9
      README.md
  3. 2 1
      cmd/detect.go
  4. 209 0
      cmd/diagnostics.go
  5. 4 1
      cmd/directory.go
  6. 91 89
      cmd/generate/config/base/config.go
  7. 32 16
      cmd/generate/config/base/config_test.go
  8. 5 1
      cmd/generate/config/main.go
  9. 40 1
      cmd/generate/config/rules/1password.go
  10. 30 0
      cmd/generate/config/rules/clickhouse.go
  11. 13 11
      cmd/generate/config/rules/config.tmpl
  12. 2 4
      cmd/generate/config/rules/gcp.go
  13. 3 3
      cmd/generate/config/rules/kubernetes.go
  14. 27 0
      cmd/generate/config/rules/perplexity.go
  15. 2 0
      cmd/git.go
  16. 2 0
      cmd/protect.go
  17. 47 5
      cmd/root.go
  18. 2 0
      cmd/stdin.go
  19. 33 34
      config/allowlist.go
  20. 1 1
      config/allowlist_test.go
  21. 153 116
      config/config.go
  22. 368 166
      config/config_test.go
  23. 41 18
      config/gitleaks.toml
  24. 34 0
      detect/codec/ascii.go
  25. 39 0
      detect/codec/base64.go
  26. 105 0
      detect/codec/decoder.go
  27. 44 7
      detect/codec/decoder_test.go
  28. 153 0
      detect/codec/encodings.go
  29. 60 0
      detect/codec/hex.go
  30. 34 0
      detect/codec/percent.go
  31. 173 0
      detect/codec/segment.go
  32. 57 0
      detect/codec/start_end.go
  33. 0 306
      detect/decoder.go
  34. 183 148
      detect/detect.go
  35. 381 158
      detect/detect_test.go
  36. 4 6
      detect/directory.go
  37. 16 4
      detect/git.go
  38. 12 5
      sources/directory.go
  39. 1 1
      testdata/config/encoded.toml
  40. 2 2
      testdata/config/generic.toml
  41. 8 9
      testdata/config/generic_with_py_path.toml
  42. 2 0
      testdata/config/invalid/allowlist_global_empty.toml
  43. 4 0
      testdata/config/invalid/allowlist_global_old_and_new.toml
  44. 3 0
      testdata/config/invalid/allowlist_global_regextarget.toml
  45. 7 0
      testdata/config/invalid/allowlist_global_target_rule_id.toml
  46. 0 2
      testdata/config/invalid/allowlist_rule_empty.toml
  47. 0 2
      testdata/config/invalid/allowlist_rule_old_and_new.toml
  48. 0 2
      testdata/config/invalid/allowlist_rule_regextarget.toml
  49. 5 3
      testdata/config/invalid/extend_invalid_base.toml
  50. 0 0
      testdata/config/invalid/rule_bad_entropy_group.toml
  51. 0 0
      testdata/config/invalid/rule_missing_id.toml
  52. 0 0
      testdata/config/invalid/rule_no_regex_or_path.toml
  53. 6 0
      testdata/config/simple.toml
  54. 10 0
      testdata/config/valid/allowlist_global_multiple.toml
  55. 2 0
      testdata/config/valid/allowlist_global_old_compat.toml
  56. 2 0
      testdata/config/valid/allowlist_global_regex.toml
  57. 20 0
      testdata/config/valid/allowlist_global_target_rules.toml
  58. 0 0
      testdata/config/valid/allowlist_rule_commit.toml
  59. 11 0
      testdata/config/valid/allowlist_rule_extend_default.toml
  60. 0 2
      testdata/config/valid/allowlist_rule_old_compat.toml
  61. 0 0
      testdata/config/valid/allowlist_rule_path.toml
  62. 0 0
      testdata/config/valid/allowlist_rule_regex.toml
  63. 1 3
      testdata/config/valid/extend.toml
  64. 1 1
      testdata/config/valid/extend_base_1.toml
  65. 0 0
      testdata/config/valid/extend_base_2.toml
  66. 0 0
      testdata/config/valid/extend_base_3.toml
  67. 1 1
      testdata/config/valid/extend_base_rule_including_keywords_with_attribute.toml
  68. 1 1
      testdata/config/valid/extend_disabled.toml
  69. 0 0
      testdata/config/valid/extend_disabled_base.toml
  70. 1 1
      testdata/config/valid/extend_rule_allowlist_and.toml
  71. 0 0
      testdata/config/valid/extend_rule_allowlist_base.toml
  72. 1 1
      testdata/config/valid/extend_rule_allowlist_or.toml
  73. 0 0
      testdata/config/valid/extend_rule_keywords_base.toml
  74. 1 1
      testdata/config/valid/extend_rule_new.toml
  75. 1 1
      testdata/config/valid/extend_rule_no_regexpath.toml
  76. 0 0
      testdata/config/valid/extend_rule_override_description.toml
  77. 0 0
      testdata/config/valid/extend_rule_override_entropy.toml
  78. 0 0
      testdata/config/valid/extend_rule_override_keywords.toml
  79. 0 0
      testdata/config/valid/extend_rule_override_path.toml
  80. 0 0
      testdata/config/valid/extend_rule_override_regex.toml
  81. 0 0
      testdata/config/valid/extend_rule_override_secret_group.toml
  82. 0 0
      testdata/config/valid/extend_rule_override_tags.toml
  83. 0 2
      testdata/config/valid/rule_entropy_group.toml
  84. 0 2
      testdata/config/valid/rule_path_only.toml
  85. 1 4
      testdata/config/valid/rule_regex_escaped_character_group.toml

+ 1 - 1
.github/workflows/test.yml

@@ -23,7 +23,7 @@ jobs:
           go-version: 1.23
           go-version: 1.23
 
 
       - name: Build
       - name: Build
-        run: go build -v ./...
+        run: go build ./...
 
 
       - name: Set up gotestsum
       - name: Set up gotestsum
         run: |
         run: |

+ 21 - 9
README.md

@@ -144,7 +144,6 @@ Usage:
   gitleaks [command]
   gitleaks [command]
 
 
 Available Commands:
 Available Commands:
-  completion  generate the autocompletion script for the specified shell
   dir         scan directories or files for secrets
   dir         scan directories or files for secrets
   git         scan git repositories for secrets
   git         scan git repositories for secrets
   help        Help about any command
   help        Help about any command
@@ -160,6 +159,8 @@ Flags:
                                       3. env var GITLEAKS_CONFIG_TOML with the file content
                                       3. env var GITLEAKS_CONFIG_TOML with the file content
                                       4. (target path)/.gitleaks.toml
                                       4. (target path)/.gitleaks.toml
                                       If none of the four options are used, then gitleaks will use the default config
                                       If none of the four options are used, then gitleaks will use the default config
+      --diagnostics string            enable diagnostics (comma-separated list: cpu,mem,trace). cpu=CPU profiling, mem=memory profiling, trace=execution tracing
+      --diagnostics-dir string        directory to store diagnostics output files (defaults to current directory)
       --enable-rule strings           only enable specific rules by id
       --enable-rule strings           only enable specific rules by id
       --exit-code int                 exit code when leaks have been encountered (default 1)
       --exit-code int                 exit code when leaks have been encountered (default 1)
   -i, --gitleaks-ignore-path string   path to .gitleaksignore file or folder containing one (default ".")
   -i, --gitleaks-ignore-path string   path to .gitleaksignore file or folder containing one (default ".")
@@ -171,7 +172,7 @@ Flags:
       --no-banner                     suppress banner
       --no-banner                     suppress banner
       --no-color                      turn off color for verbose output
       --no-color                      turn off color for verbose output
       --redact uint[=100]             redact secrets from logs and stdout. To redact only parts of the secret just apply a percent value from 0..100. For example --redact=20 (default 100%)
       --redact uint[=100]             redact secrets from logs and stdout. To redact only parts of the secret just apply a percent value from 0..100. For example --redact=20 (default 100%)
-  -f, --report-format string          output format (json, csv, junit, sarif) (default "json")
+  -f, --report-format string          output format (json, csv, junit, sarif, template)
   -r, --report-path string            report file
   -r, --report-path string            report file
       --report-template string        template file used to generate the report (implies --report-format=template)
       --report-template string        template file used to generate the report (implies --report-format=template)
   -v, --verbose                       show verbose output from scan
   -v, --verbose                       show verbose output from scan
@@ -290,11 +291,10 @@ disabledRules = [ "generic-api-key"]
 # An array of tables that contain information that define instructions
 # An array of tables that contain information that define instructions
 # on how to detect secrets
 # on how to detect secrets
 [[rules]]
 [[rules]]
-
 # Unique identifier for this rule
 # Unique identifier for this rule
 id = "awesome-rule-1"
 id = "awesome-rule-1"
 
 
-# Short human readable description of the rule.
+# Short human-readable description of the rule.
 description = "awesome rule 1"
 description = "awesome rule 1"
 
 
 # Golang regular expression used to detect secrets. Note Golang's regex engine
 # Golang regular expression used to detect secrets. Note Golang's regex engine
@@ -368,19 +368,20 @@ id = "gitlab-pat"
     regexTarget = "line"
     regexTarget = "line"
     regexes = [ '''MY-glpat-''' ]
     regexes = [ '''MY-glpat-''' ]
 
 
-# This is a global allowlist which has a higher order of precedence than rule-specific allowlists.
+
+# ⚠️ In v8.25.0 `[allowlist]` was replaced with `[[allowlists]]`.
+# 
+# Global allowlists have a higher order of precedence than rule-specific allowlists.
 # If a commit listed in the `commits` field below is encountered then that commit will be skipped and no
 # If a commit listed in the `commits` field below is encountered then that commit will be skipped and no
 # secrets will be detected for said commit. The same logic applies for regexes and paths.
 # secrets will be detected for said commit. The same logic applies for regexes and paths.
-[allowlist]
+[[allowlists]]
 description = "global allow list"
 description = "global allow list"
 commits = [ "commit-A", "commit-B", "commit-C"]
 commits = [ "commit-A", "commit-B", "commit-C"]
 paths = [
 paths = [
   '''gitleaks\.toml''',
   '''gitleaks\.toml''',
   '''(.*?)(jpg|gif|doc)'''
   '''(.*?)(jpg|gif|doc)'''
 ]
 ]
-
 # note: (global) regexTarget defaults to check the _Secret_ in the finding.
 # note: (global) regexTarget defaults to check the _Secret_ in the finding.
-# if regexTarget is not specified then _Secret_ will be used.
 # Acceptable values for regexTarget are "match" and "line"
 # Acceptable values for regexTarget are "match" and "line"
 regexTarget = "match"
 regexTarget = "match"
 regexes = [
 regexes = [
@@ -394,6 +395,15 @@ stopwords = [
   '''client''',
   '''client''',
   '''endpoint''',
   '''endpoint''',
 ]
 ]
+
+# ⚠️ In v8.25.0, `[[allowlists]]` have a new field called |targetRules|.
+#
+# Common allowlists can be defined once and assigned to multiple rules using |targetRules|.
+# This will only run on the specified rules, not globally.
+[[allowlists]]
+targetRules = ["awesome-rule-1", "awesome-rule-2"]
+description = "Our test assets trigger false-positives in a couple rules."
+paths = ['''tests/expected/._\.json$''']
 ```
 ```
 
 
 Refer to the default [gitleaks config](https://github.com/gitleaks/gitleaks/blob/master/config/gitleaks.toml) for examples or follow the [contributing guidelines](https://github.com/gitleaks/gitleaks/blob/master/CONTRIBUTING.md) if you would like to contribute to the default configuration. Additionally, you can check out [this gitleaks blog post](https://blog.gitleaks.io/stop-leaking-secrets-configuration-2-3-aeed293b1fbf) which covers advanced configuration setups.
 Refer to the default [gitleaks config](https://github.com/gitleaks/gitleaks/blob/master/config/gitleaks.toml) for examples or follow the [contributing guidelines](https://github.com/gitleaks/gitleaks/blob/master/CONTRIBUTING.md) if you would like to contribute to the default configuration. Additionally, you can check out [this gitleaks blog post](https://blog.gitleaks.io/stop-leaking-secrets-configuration-2-3-aeed293b1fbf) which covers advanced configuration setups.
@@ -440,7 +450,9 @@ ways:
 
 
 Currently supported encodings:
 Currently supported encodings:
 
 
-- `base64` (both standard and base64url)
+- **percent** - Any printable ASCII percent encoded values
+- **hex** - Any printable ASCII hex encoded values >= 32 characters 
+- **base64** - Any printable ASCII base64 encoded values >= 16 characters 
 
 
 #### Reporting
 #### Reporting
 
 

+ 2 - 1
cmd/detect.go

@@ -55,6 +55,7 @@ func runDetect(cmd *cobra.Command, args []string) {
 
 
 	// setup config (aka, the thing that defines rules)
 	// setup config (aka, the thing that defines rules)
 	initConfig(source)
 	initConfig(source)
+	initDiagnostics()
 	cfg := Config(cmd)
 	cfg := Config(cmd)
 
 
 	// create detector
 	// create detector
@@ -78,7 +79,7 @@ func runDetect(cmd *cobra.Command, args []string) {
 			source,
 			source,
 			detector.Sema,
 			detector.Sema,
 			detector.FollowSymlinks,
 			detector.FollowSymlinks,
-			detector.Config.Allowlist.PathAllowed,
+			detector.Config.Allowlists,
 		)
 		)
 		if err != nil {
 		if err != nil {
 			logging.Fatal().Err(err).Send()
 			logging.Fatal().Err(err).Send()

+ 209 - 0
cmd/diagnostics.go

@@ -0,0 +1,209 @@
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/pprof"
+	"runtime/trace"
+	"strings"
+
+	"github.com/zricethezav/gitleaks/v8/logging"
+)
+
+// DiagnosticsManager manages various types of diagnostics
+type DiagnosticsManager struct {
+	Enabled      bool
+	DiagTypes    []string
+	OutputDir    string
+	cpuProfile   *os.File
+	memProfile   string
+	traceProfile *os.File
+}
+
+// NewDiagnosticsManager creates a new DiagnosticsManager instance
+func NewDiagnosticsManager(diagnosticsFlag string, diagnosticsDir string) (*DiagnosticsManager, error) {
+	if diagnosticsFlag == "" {
+		return &DiagnosticsManager{Enabled: false}, nil
+	}
+
+	dm := &DiagnosticsManager{
+		Enabled:   true,
+		DiagTypes: strings.Split(diagnosticsFlag, ","),
+		OutputDir: diagnosticsDir,
+	}
+
+	// If no output directory is specified, use the current directory
+	if dm.OutputDir == "" {
+		var err error
+		dm.OutputDir, err = os.Getwd()
+		if err != nil {
+			return nil, fmt.Errorf("failed to get current directory: %w", err)
+		}
+		logging.Debug().Msgf("No diagnostics directory specified, using current directory: %s", dm.OutputDir)
+	}
+
+	// Create the output directory if it doesn't exist
+	if err := os.MkdirAll(dm.OutputDir, 0755); err != nil {
+		return nil, fmt.Errorf("failed to create diagnostics directory: %w", err)
+	}
+
+	// Make sure the output directory is absolute
+	if !filepath.IsAbs(dm.OutputDir) {
+		absPath, err := filepath.Abs(dm.OutputDir)
+		if err != nil {
+			return nil, fmt.Errorf("failed to get absolute path for diagnostics directory: %w", err)
+		}
+		dm.OutputDir = absPath
+	}
+
+	logging.Debug().Msgf("Diagnostics enabled: %s", strings.Join(dm.DiagTypes, ","))
+	logging.Debug().Msgf("Diagnostics output directory: %s", dm.OutputDir)
+
+	return dm, nil
+}
+
+// StartDiagnostics starts all enabled diagnostics
+func (dm *DiagnosticsManager) StartDiagnostics() error {
+	if !dm.Enabled {
+		return nil
+	}
+
+	var err error
+
+	for _, diagType := range dm.DiagTypes {
+		diagType = strings.TrimSpace(diagType)
+		switch diagType {
+		case "cpu":
+			if err = dm.StartCPUProfile(); err != nil {
+				return err
+			}
+		case "mem":
+			if err = dm.SetupMemoryProfile(); err != nil {
+				return err
+			}
+		case "trace":
+			if err = dm.StartTraceProfile(); err != nil {
+				return err
+			}
+		default:
+			logging.Warn().Msgf("Unknown diagnostics type: %s", diagType)
+		}
+	}
+
+	return nil
+}
+
+// StopDiagnostics stops all started diagnostics
+func (dm *DiagnosticsManager) StopDiagnostics() {
+	if !dm.Enabled {
+		return
+	}
+
+	logging.Debug().Msg("Stopping diagnostics and writing profiling data...")
+
+	for _, diagType := range dm.DiagTypes {
+		diagType = strings.TrimSpace(diagType)
+		switch diagType {
+		case "cpu":
+			dm.StopCPUProfile()
+		case "mem":
+			dm.WriteMemoryProfile()
+		case "trace":
+			dm.StopTraceProfile()
+		}
+	}
+}
+
+// StartCPUProfile starts CPU profiling
+func (dm *DiagnosticsManager) StartCPUProfile() error {
+	cpuProfilePath := filepath.Join(dm.OutputDir, "cpu.pprof")
+	f, err := os.Create(cpuProfilePath)
+	if err != nil {
+		return fmt.Errorf("could not create CPU profile at %s: %w", cpuProfilePath, err)
+	}
+
+	if err := pprof.StartCPUProfile(f); err != nil {
+		f.Close()
+		return fmt.Errorf("could not start CPU profile: %w", err)
+	}
+
+	dm.cpuProfile = f
+	return nil
+}
+
+// StopCPUProfile stops CPU profiling
+func (dm *DiagnosticsManager) StopCPUProfile() {
+	if dm.cpuProfile != nil {
+		pprof.StopCPUProfile()
+		if err := dm.cpuProfile.Close(); err != nil {
+			logging.Error().Err(err).Msg("Error closing CPU profile file")
+		}
+		logging.Info().Msgf("CPU profile written to: %s", dm.cpuProfile.Name())
+		dm.cpuProfile = nil
+	}
+}
+
+// SetupMemoryProfile sets up memory profiling to be written when StopDiagnostics is called
+func (dm *DiagnosticsManager) SetupMemoryProfile() error {
+	memProfilePath := filepath.Join(dm.OutputDir, "mem.pprof")
+	dm.memProfile = memProfilePath
+	return nil
+}
+
+// WriteMemoryProfile writes the memory profile to disk
+func (dm *DiagnosticsManager) WriteMemoryProfile() {
+	if dm.memProfile == "" {
+		return
+	}
+
+	f, err := os.Create(dm.memProfile)
+	if err != nil {
+		logging.Error().Err(err).Msgf("Could not create memory profile at %s", dm.memProfile)
+		return
+	}
+
+	// Get memory profile
+	runtime.GC() // Run GC before taking the memory profile
+	if err := pprof.WriteHeapProfile(f); err != nil {
+		logging.Error().Err(err).Msg("Could not write memory profile")
+	} else {
+		logging.Info().Msgf("Memory profile written to: %s", dm.memProfile)
+	}
+
+	if err := f.Close(); err != nil {
+		logging.Error().Err(err).Msg("Error closing memory profile file")
+	}
+
+	dm.memProfile = ""
+}
+
+// StartTraceProfile starts execution tracing
+func (dm *DiagnosticsManager) StartTraceProfile() error {
+	traceProfilePath := filepath.Join(dm.OutputDir, "trace.out")
+	f, err := os.Create(traceProfilePath)
+	if err != nil {
+		return fmt.Errorf("could not create trace profile at %s: %w", traceProfilePath, err)
+	}
+
+	if err := trace.Start(f); err != nil {
+		f.Close()
+		return fmt.Errorf("could not start trace profile: %w", err)
+	}
+
+	dm.traceProfile = f
+	return nil
+}
+
+// StopTraceProfile stops execution tracing
+func (dm *DiagnosticsManager) StopTraceProfile() {
+	if dm.traceProfile != nil {
+		trace.Stop()
+		if err := dm.traceProfile.Close(); err != nil {
+			logging.Error().Err(err).Msg("Error closing trace profile file")
+		}
+		logging.Info().Msgf("Trace profile written to: %s", dm.traceProfile.Name())
+		dm.traceProfile = nil
+	}
+}

+ 4 - 1
cmd/directory.go

@@ -31,7 +31,10 @@ func runDirectory(cmd *cobra.Command, args []string) {
 			source = "."
 			source = "."
 		}
 		}
 	}
 	}
+
 	initConfig(source)
 	initConfig(source)
+	initDiagnostics()
+
 	var (
 	var (
 		findings []report.Finding
 		findings []report.Finding
 		err      error
 		err      error
@@ -60,7 +63,7 @@ func runDirectory(cmd *cobra.Command, args []string) {
 		source,
 		source,
 		detector.Sema,
 		detector.Sema,
 		detector.FollowSymlinks,
 		detector.FollowSymlinks,
-		detector.Config.Allowlist.PathAllowed,
+		detector.Config.Allowlists,
 	)
 	)
 	if err != nil {
 	if err != nil {
 		logging.Fatal().Err(err)
 		logging.Fatal().Err(err)

+ 91 - 89
cmd/generate/config/base/config.go

@@ -11,106 +11,108 @@ import (
 func CreateGlobalConfig() config.Config {
 func CreateGlobalConfig() config.Config {
 	return config.Config{
 	return config.Config{
 		Title: "gitleaks config",
 		Title: "gitleaks config",
-		Allowlist: &config.Allowlist{
-			Description: "global allow lists",
-			Regexes: []*regexp.Regexp{
-				// ----------- General placeholders -----------
-				regexp.MustCompile(`(?i)^true|false|null$`),
-				// Awkward workaround to detect repeated characters.
-				func() *regexp.Regexp {
-					var (
-						letters  = "abcdefghijklmnopqrstuvwxyz*."
-						patterns []string
-					)
-					for _, char := range letters {
-						if char == '*' || char == '.' {
-							patterns = append(patterns, fmt.Sprintf("\\%c+", char))
-						} else {
-							patterns = append(patterns, fmt.Sprintf("%c+", char))
+		Allowlists: []*config.Allowlist{
+			{
+				Description: "global allow lists",
+				Regexes: []*regexp.Regexp{
+					// ----------- General placeholders -----------
+					regexp.MustCompile(`(?i)^true|false|null$`),
+					// Awkward workaround to detect repeated characters.
+					func() *regexp.Regexp {
+						var (
+							letters  = "abcdefghijklmnopqrstuvwxyz*."
+							patterns []string
+						)
+						for _, char := range letters {
+							if char == '*' || char == '.' {
+								patterns = append(patterns, fmt.Sprintf("\\%c+", char))
+							} else {
+								patterns = append(patterns, fmt.Sprintf("%c+", char))
+							}
 						}
 						}
-					}
-					return regexp.MustCompile("^(?i:" + strings.Join(patterns, "|") + ")$")
-				}(),
+						return regexp.MustCompile("^(?i:" + strings.Join(patterns, "|") + ")$")
+					}(),
 
 
-				// ----------- Environment Variables -----------
-				regexp.MustCompile(`^\$(?:\d+|{\d+})$`),
-				regexp.MustCompile(`^\$(?:[A-Z_]+|[a-z_]+)$`),
-				regexp.MustCompile(`^\${(?:[A-Z_]+|[a-z_]+)}$`),
+					// ----------- Environment Variables -----------
+					regexp.MustCompile(`^\$(?:\d+|{\d+})$`),
+					regexp.MustCompile(`^\$(?:[A-Z_]+|[a-z_]+)$`),
+					regexp.MustCompile(`^\${(?:[A-Z_]+|[a-z_]+)}$`),
 
 
-				// ----------- Interpolated Variables -----------
-				// Ansible (https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_variables.html)
-				regexp.MustCompile(`^\{\{[ \t]*[\w ().|]+[ \t]*}}$`),
-				// GitHub Actions
-				// https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables
-				// https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions
-				regexp.MustCompile(`^\$\{\{[ \t]*(?:(?:env|github|secrets|vars)(?:\.[A-Za-z]\w+)+[\w "'&./=|]*)[ \t]*}}$`),
-				// NuGet (https://learn.microsoft.com/en-us/nuget/reference/nuget-config-file#using-environment-variables)
-				regexp.MustCompile(`^%(?:[A-Z_]+|[a-z_]+)%$`),
-				// String formatting.
-				regexp.MustCompile(`^%[+\-# 0]?[bcdeEfFgGoOpqstTUvxX]$`), // Golang (https://pkg.go.dev/fmt)
-				regexp.MustCompile(`^\{\d{0,2}}$`),                       // Python (https://docs.python.org/3/tutorial/inputoutput.html)
-				// Urban Code Deploy (https://www.ibm.com/support/pages/replace-token-step-replaces-replacement-values-windows-variables)
-				regexp.MustCompile(`^@(?:[A-Z_]+|[a-z_]+)@$`),
+					// ----------- Interpolated Variables -----------
+					// Ansible (https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_variables.html)
+					regexp.MustCompile(`^\{\{[ \t]*[\w ().|]+[ \t]*}}$`),
+					// GitHub Actions
+					// https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables
+					// https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions
+					regexp.MustCompile(`^\$\{\{[ \t]*(?:(?:env|github|secrets|vars)(?:\.[A-Za-z]\w+)+[\w "'&./=|]*)[ \t]*}}$`),
+					// NuGet (https://learn.microsoft.com/en-us/nuget/reference/nuget-config-file#using-environment-variables)
+					regexp.MustCompile(`^%(?:[A-Z_]+|[a-z_]+)%$`),
+					// String formatting.
+					regexp.MustCompile(`^%[+\-# 0]?[bcdeEfFgGoOpqstTUvxX]$`), // Golang (https://pkg.go.dev/fmt)
+					regexp.MustCompile(`^\{\d{0,2}}$`),                       // Python (https://docs.python.org/3/tutorial/inputoutput.html)
+					// Urban Code Deploy (https://www.ibm.com/support/pages/replace-token-step-replaces-replacement-values-windows-variables)
+					regexp.MustCompile(`^@(?:[A-Z_]+|[a-z_]+)@$`),
 
 
-				// ----------- Miscellaneous -----------
-				// File paths
-				regexp.MustCompile(`^/Users/(?i)[a-z0-9]+/[\w .-/]+$`),              // MacOS
-				regexp.MustCompile(`^/(?:bin|etc|home|opt|tmp|usr|var)/[\w ./-]+$`), // Linux
-				// 11980 Jps -Dapplication.home=D:\develop_tools\jdk\jdk1.8.0_131 -Xms8m
-				//regexp.MustCompile(`^$`), // Windows
-			},
-			Paths: []*regexp.Regexp{
-				regexp.MustCompile(`gitleaks\.toml`),
+					// ----------- Miscellaneous -----------
+					// File paths
+					regexp.MustCompile(`^/Users/(?i)[a-z0-9]+/[\w .-/]+$`),              // MacOS
+					regexp.MustCompile(`^/(?:bin|etc|home|opt|tmp|usr|var)/[\w ./-]+$`), // Linux
+					// 11980 Jps -Dapplication.home=D:\develop_tools\jdk\jdk1.8.0_131 -Xms8m
+					//regexp.MustCompile(`^$`), // Windows
+				},
+				Paths: []*regexp.Regexp{
+					regexp.MustCompile(`gitleaks\.toml`),
 
 
-				// ----------- Documents and media -----------
-				regexp.MustCompile(`(?i)\.(?:bmp|gif|jpe?g|png|svg|tiff?)$`), // Images
-				regexp.MustCompile(`(?i)\.(?:eot|[ot]tf|woff2?)$`),           // Fonts
-				regexp.MustCompile(`(?i)\.(?:docx?|xlsx?|pdf|bin|socket|vsidx|v2|suo|wsuo|.dll|pdb|exe|gltf)$`),
+					// ----------- Documents and media -----------
+					regexp.MustCompile(`(?i)\.(?:bmp|gif|jpe?g|png|svg|tiff?)$`), // Images
+					regexp.MustCompile(`(?i)\.(?:eot|[ot]tf|woff2?)$`),           // Fonts
+					regexp.MustCompile(`(?i)\.(?:docx?|xlsx?|pdf|bin|socket|vsidx|v2|suo|wsuo|.dll|pdb|exe|gltf)$`),
 
 
-				// ----------- Golang files -----------
-				regexp.MustCompile(`go\.(?:mod|sum|work(?:\.sum)?)$`),
-				regexp.MustCompile(`(?:^|/)vendor/modules\.txt$`),
-				regexp.MustCompile(`(?:^|/)vendor/(?:github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)(?:/.*)?$`),
+					// ----------- Golang files -----------
+					regexp.MustCompile(`go\.(?:mod|sum|work(?:\.sum)?)$`),
+					regexp.MustCompile(`(?:^|/)vendor/modules\.txt$`),
+					regexp.MustCompile(`(?:^|/)vendor/(?:github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)(?:/.*)?$`),
 
 
-				// ----------- Java files -----------
-				// Gradle
-				regexp.MustCompile(`(?:^|/)gradlew(?:\.bat)?$`),
-				regexp.MustCompile(`(?:^|/)gradle\.lockfile$`),
-				regexp.MustCompile(`(?:^|/)mvnw(?:\.cmd)?$`),
-				regexp.MustCompile(`(?:^|/)\.mvn/wrapper/MavenWrapperDownloader\.java$`),
+					// ----------- Java files -----------
+					// Gradle
+					regexp.MustCompile(`(?:^|/)gradlew(?:\.bat)?$`),
+					regexp.MustCompile(`(?:^|/)gradle\.lockfile$`),
+					regexp.MustCompile(`(?:^|/)mvnw(?:\.cmd)?$`),
+					regexp.MustCompile(`(?:^|/)\.mvn/wrapper/MavenWrapperDownloader\.java$`),
 
 
-				// ----------- JavaScript files -----------
-				// Dependencies and lock files.
-				regexp.MustCompile(`(?:^|/)node_modules(?:/.*)?$`),
-				regexp.MustCompile(`(?:^|/)(?:deno\.lock|npm-shrinkwrap\.json|package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$`),
-				regexp.MustCompile(`(?:^|/)bower_components(?:/.*)?$`),
-				// TODO: Add more common static assets, such as swagger-ui.
-				regexp.MustCompile(`(?:^|/)(?:angular|bootstrap|jquery(?:-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(?:\.min)?\.js(?:\.map)?$`),
-				regexp.MustCompile(`(?:^|/)javascript\.json$`),
+					// ----------- JavaScript files -----------
+					// Dependencies and lock files.
+					regexp.MustCompile(`(?:^|/)node_modules(?:/.*)?$`),
+					regexp.MustCompile(`(?:^|/)(?:deno\.lock|npm-shrinkwrap\.json|package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$`),
+					regexp.MustCompile(`(?:^|/)bower_components(?:/.*)?$`),
+					// TODO: Add more common static assets, such as swagger-ui.
+					regexp.MustCompile(`(?:^|/)(?:angular|bootstrap|jquery(?:-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(?:\.min)?\.js(?:\.map)?$`),
+					regexp.MustCompile(`(?:^|/)javascript\.json$`),
 
 
-				// ----------- Python files -----------
-				// Dependencies and lock files.
-				regexp.MustCompile(`(?:^|/)(?:Pipfile|poetry)\.lock$`),
-				// Virtual environments
-				regexp.MustCompile(`(?i)(?:^|/)(?:v?env|virtualenv)/lib(?:64)?(?:/.*)?$`),
-				regexp.MustCompile(`(?i)(?:^|/)(?:lib(?:64)?/python[23](?:\.\d{1,2})+|python/[23](?:\.\d{1,2})+/lib(?:64)?)(?:/.*)?$`),
-				// dist-info directory (https://py-pkgs.org/04-package-structure.html#building-sdists-and-wheels)
-				regexp.MustCompile(`(?i)(?:^|/)[a-z0-9_.]+-[0-9.]+\.dist-info(?:/.+)?$`),
+					// ----------- Python files -----------
+					// Dependencies and lock files.
+					regexp.MustCompile(`(?:^|/)(?:Pipfile|poetry)\.lock$`),
+					// Virtual environments
+					regexp.MustCompile(`(?i)(?:^|/)(?:v?env|virtualenv)/lib(?:64)?(?:/.*)?$`),
+					regexp.MustCompile(`(?i)(?:^|/)(?:lib(?:64)?/python[23](?:\.\d{1,2})+|python/[23](?:\.\d{1,2})+/lib(?:64)?)(?:/.*)?$`),
+					// dist-info directory (https://py-pkgs.org/04-package-structure.html#building-sdists-and-wheels)
+					regexp.MustCompile(`(?i)(?:^|/)[a-z0-9_.]+-[0-9.]+\.dist-info(?:/.+)?$`),
 
 
-				// ----------- Ruby files -----------
-				regexp.MustCompile(`(?:^|/)vendor/(?:bundle|ruby)(?:/.*?)?$`),
-				regexp.MustCompile(`\.gem$`), // tar archive
+					// ----------- Ruby files -----------
+					regexp.MustCompile(`(?:^|/)vendor/(?:bundle|ruby)(?:/.*?)?$`),
+					regexp.MustCompile(`\.gem$`), // tar archive
 
 
-				// Misc
-				regexp.MustCompile(`verification-metadata\.xml`),
-				regexp.MustCompile(`Database.refactorlog`),
-				// regexp.MustCompile(`vendor`),
-			},
-			StopWords: []string{
-				"abcdefghijklmnopqrstuvwxyz", // character range
-				// ----------- Secrets -----------
-				// Checkmarx client secret. (https://github.com/checkmarx-ts/checkmarx-python-sdk/blob/86560f6e2a3e46d16322101294da10d5d190312d/README.md?plain=1#L56)
-				"014df517-39d1-4453-b7b3-9930c563627c",
+					// Misc
+					regexp.MustCompile(`verification-metadata\.xml`),
+					regexp.MustCompile(`Database.refactorlog`),
+					// regexp.MustCompile(`vendor`),
+				},
+				StopWords: []string{
+					"abcdefghijklmnopqrstuvwxyz", // character range
+					// ----------- Secrets -----------
+					// Checkmarx client secret. (https://github.com/checkmarx-ts/checkmarx-python-sdk/blob/86560f6e2a3e46d16322101294da10d5d190312d/README.md?plain=1#L56)
+					"014df517-39d1-4453-b7b3-9930c563627c",
+				},
 			},
 			},
 		},
 		},
 	}
 	}

+ 32 - 16
cmd/generate/config/base/config_test.go

@@ -75,18 +75,22 @@ var allowlistRegexTests = map[string]struct {
 
 
 func TestConfigAllowlistRegexes(t *testing.T) {
 func TestConfigAllowlistRegexes(t *testing.T) {
 	cfg := CreateGlobalConfig()
 	cfg := CreateGlobalConfig()
-	allowlist := cfg.Allowlist
+	allowlists := cfg.Allowlists
 	for name, cases := range allowlistRegexTests {
 	for name, cases := range allowlistRegexTests {
 		t.Run(name, func(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			for _, c := range cases.invalid {
 			for _, c := range cases.invalid {
-				if !allowlist.RegexAllowed(c) {
-					t.Errorf("invalid value not marked as allowed: %s", c)
+				for _, a := range allowlists {
+					if !a.RegexAllowed(c) {
+						t.Errorf("invalid value not marked as allowed: %s", c)
+					}
 				}
 				}
 			}
 			}
 
 
 			for _, c := range cases.valid {
 			for _, c := range cases.valid {
-				if allowlist.RegexAllowed(c) {
-					t.Errorf("valid value marked as allowed: %s", c)
+				for _, a := range allowlists {
+					if a.RegexAllowed(c) {
+						t.Errorf("valid value marked as allowed: %s", c)
+					}
 				}
 				}
 			}
 			}
 		})
 		})
@@ -95,15 +99,19 @@ func TestConfigAllowlistRegexes(t *testing.T) {
 
 
 func BenchmarkConfigAllowlistRegexes(b *testing.B) {
 func BenchmarkConfigAllowlistRegexes(b *testing.B) {
 	cfg := CreateGlobalConfig()
 	cfg := CreateGlobalConfig()
-	allowlist := cfg.Allowlist
+	allowlists := cfg.Allowlists
 	for n := 0; n < b.N; n++ {
 	for n := 0; n < b.N; n++ {
 		for _, cases := range allowlistRegexTests {
 		for _, cases := range allowlistRegexTests {
 			for _, c := range cases.invalid {
 			for _, c := range cases.invalid {
-				allowlist.RegexAllowed(c)
+				for _, a := range allowlists {
+					a.RegexAllowed(c)
+				}
 			}
 			}
 
 
 			for _, c := range cases.valid {
 			for _, c := range cases.valid {
-				allowlist.RegexAllowed(c)
+				for _, a := range allowlists {
+					a.RegexAllowed(c)
+				}
 			}
 			}
 		}
 		}
 	}
 	}
@@ -152,18 +160,22 @@ var allowlistPathsTests = map[string]struct {
 
 
 func TestConfigAllowlistPaths(t *testing.T) {
 func TestConfigAllowlistPaths(t *testing.T) {
 	cfg := CreateGlobalConfig()
 	cfg := CreateGlobalConfig()
-	allowlist := cfg.Allowlist
+	allowlists := cfg.Allowlists
 	for name, cases := range allowlistPathsTests {
 	for name, cases := range allowlistPathsTests {
 		t.Run(name, func(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			for _, c := range cases.invalid {
 			for _, c := range cases.invalid {
-				if !allowlist.PathAllowed(c) {
-					t.Errorf("invalid path not marked as allowed: %s", c)
+				for _, a := range allowlists {
+					if !a.PathAllowed(c) {
+						t.Errorf("invalid path not marked as allowed: %s", c)
+					}
 				}
 				}
 			}
 			}
 
 
 			for _, c := range cases.valid {
 			for _, c := range cases.valid {
-				if allowlist.PathAllowed(c) {
-					t.Errorf("valid path marked as allowed: %s", c)
+				for _, a := range allowlists {
+					if a.PathAllowed(c) {
+						t.Errorf("valid path marked as allowed: %s", c)
+					}
 				}
 				}
 			}
 			}
 		})
 		})
@@ -172,15 +184,19 @@ func TestConfigAllowlistPaths(t *testing.T) {
 
 
 func BenchmarkConfigAllowlistPaths(b *testing.B) {
 func BenchmarkConfigAllowlistPaths(b *testing.B) {
 	cfg := CreateGlobalConfig()
 	cfg := CreateGlobalConfig()
-	allowlist := cfg.Allowlist
+	allowlists := cfg.Allowlists
 	for n := 0; n < b.N; n++ {
 	for n := 0; n < b.N; n++ {
 		for _, cases := range allowlistPathsTests {
 		for _, cases := range allowlistPathsTests {
 			for _, c := range cases.invalid {
 			for _, c := range cases.invalid {
-				allowlist.PathAllowed(c)
+				for _, a := range allowlists {
+					a.PathAllowed(c)
+				}
 			}
 			}
 
 
 			for _, c := range cases.valid {
 			for _, c := range cases.valid {
-				allowlist.PathAllowed(c)
+				for _, a := range allowlists {
+					a.PathAllowed(c)
+				}
 			}
 			}
 		}
 		}
 	}
 	}

+ 5 - 1
cmd/generate/config/main.go

@@ -1,10 +1,11 @@
 package main
 package main
 
 
 import (
 import (
-	"golang.org/x/exp/slices"
 	"os"
 	"os"
 	"text/template"
 	"text/template"
 
 
+	"golang.org/x/exp/slices"
+
 	"github.com/zricethezav/gitleaks/v8/cmd/generate/config/base"
 	"github.com/zricethezav/gitleaks/v8/cmd/generate/config/base"
 	"github.com/zricethezav/gitleaks/v8/cmd/generate/config/rules"
 	"github.com/zricethezav/gitleaks/v8/cmd/generate/config/rules"
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/config"
@@ -25,6 +26,7 @@ func main() {
 	gitleaksConfigPath := os.Args[1]
 	gitleaksConfigPath := os.Args[1]
 
 
 	configRules := []*config.Rule{
 	configRules := []*config.Rule{
+		rules.OnePasswordSecretKey(),
 		rules.OnePasswordServiceAccountToken(),
 		rules.OnePasswordServiceAccountToken(),
 		rules.AdafruitAPIKey(),
 		rules.AdafruitAPIKey(),
 		rules.AdobeClientID(),
 		rules.AdobeClientID(),
@@ -47,6 +49,7 @@ func main() {
 		rules.Beamer(),
 		rules.Beamer(),
 		rules.CodecovAccessToken(),
 		rules.CodecovAccessToken(),
 		rules.CoinbaseAccessToken(),
 		rules.CoinbaseAccessToken(),
+		rules.ClickHouseCloud(),
 		rules.Clojars(),
 		rules.Clojars(),
 		rules.CloudflareAPIKey(),
 		rules.CloudflareAPIKey(),
 		rules.CloudflareGlobalAPIKey(),
 		rules.CloudflareGlobalAPIKey(),
@@ -165,6 +168,7 @@ func main() {
 		rules.OktaAccessToken(),
 		rules.OktaAccessToken(),
 		rules.OpenAI(),
 		rules.OpenAI(),
 		rules.OpenshiftUserToken(),
 		rules.OpenshiftUserToken(),
+		rules.PerplexityAPIKey(),
 		rules.PlaidAccessID(),
 		rules.PlaidAccessID(),
 		rules.PlaidSecretKey(),
 		rules.PlaidSecretKey(),
 		rules.PlaidAccessToken(),
 		rules.PlaidAccessToken(),

+ 40 - 1
cmd/generate/config/rules/1password.go

@@ -33,10 +33,49 @@ export OP_SERVICE_ACCOUNT_TOKEN=ops_eyJzaWduSW5BZGRyZXNzIjoibXkuMXBhc3N3b3JkLmNv
           serviceAccountToken:
           serviceAccountToken:
             fn::secret: ops_eyJzaWduSW5B..[Redacted]`,
             fn::secret: ops_eyJzaWduSW5B..[Redacted]`,
 		`: To start using this service account, run the following command:
 		`: To start using this service account, run the following command:
-: 
+:
 : export OP_SERVICE_ACCOUNT_TOKEN=ops_eyJzaWduSW5BZGRyZXNzIjoiaHR0cHM6...`,
 : export OP_SERVICE_ACCOUNT_TOKEN=ops_eyJzaWduSW5BZGRyZXNzIjoiaHR0cHM6...`,
 		// Low entropy.
 		// Low entropy.
 		`ops_eyJxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`,
 		`ops_eyJxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`,
 	}
 	}
 	return utils.Validate(r, tps, fps)
 	return utils.Validate(r, tps, fps)
 }
 }
+
+// Reference:
+// - https://1passwordstatic.com/files/security/1password-white-paper.pdf
+func OnePasswordSecretKey() *config.Rule {
+	// 1Password secret keys include several hyphens but these are only for readability
+	// and are stripped during 1Password login. This means that the following are technically
+	// the same valid key:
+	//   - A3ASWWYB798JRYLJVD423DC286TVMH43EB
+	//   - A-3-A-S-W-W-Y-B-7-9-8-J-R-Y-L-J-V-D-4-2-3-D-C-2-8-6-T-V-M-H-4-3-E-B
+	// But in practice, when these keys are added to a vault, exported in an emergency kit, or
+	// copied, they have hyphens that follow one of two patterns I can find:
+	//   - A3-ASWWYB-798JRY-LJVD4-23DC2-86TVM-H43EB (every key I've generated has this pattern)
+	//   - A3-ASWWYB-798JRYLJVD4-23DC2-86TVM-H43EB  (the whitepaper includes this example, which could just be a typo)
+	// To avoid a complicated regex that checks for every possible situation it's probably best
+	// to scan for the these two patterns.
+	r := config.Rule{
+		Description: "Uncovered a possible 1Password secret key, potentially compromising access to secrets in vaults.",
+		RuleID:      "1password-secret-key",
+		Regex:       regexp.MustCompile(`\bA3-[A-Z0-9]{6}-(?:(?:[A-Z0-9]{11})|(?:[A-Z0-9]{6}-[A-Z0-9]{5}))-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}\b`),
+		Entropy:     3.8,
+		Keywords:    []string{"A3-"},
+	}
+
+	// validate
+	tps := utils.GenerateSampleSecrets("1password", secrets.NewSecret(`A3-[A-Z0-9]{6}-[A-Z0-9]{11}-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}`))
+	tps = append(tps, utils.GenerateSampleSecrets("1password", secrets.NewSecret(`A3-[A-Z0-9]{6}-[A-Z0-9]{6}-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}`))...)
+	tps = append(tps,
+		// from whitepaper
+		`A3-ASWWYB-798JRYLJVD4-23DC2-86TVM-H43EB`,
+		`A3-ASWWYB-798JRY-LJVD4-23DC2-86TVM-H43EB`,
+	)
+	fps := []string{
+		// low entropy
+		`A3-XXXXXX-XXXXXXXXXXX-XXXXX-XXXXX-XXXXX`,
+		// lowercase
+		`A3-xXXXXX-XXXXXX-XXXXX-XXXXX-XXXXX-XXXXX`,
+	}
+	return utils.Validate(r, tps, fps)
+}

+ 30 - 0
cmd/generate/config/rules/clickhouse.go

@@ -0,0 +1,30 @@
+package rules
+
+import (
+	"github.com/zricethezav/gitleaks/v8/cmd/generate/config/utils"
+	"github.com/zricethezav/gitleaks/v8/cmd/generate/secrets"
+	"github.com/zricethezav/gitleaks/v8/config"
+	"github.com/zricethezav/gitleaks/v8/regexp"
+)
+
+func ClickHouseCloud() *config.Rule {
+	// define rule
+	r := config.Rule{
+		RuleID:      "clickhouse-cloud-api-secret-key",
+		Description: "Identified a pattern that may indicate clickhouse cloud API secret key, risking unauthorized clickhouse cloud api access and data breaches on ClickHouse Cloud platforms.",
+		Regex:       regexp.MustCompile(`\b(4b1d[A-Za-z0-9]{38})\b`),
+		Entropy:     3,
+		Keywords: []string{
+			"4b1d", // Prefix
+		},
+	}
+
+	// validate
+	tps := utils.GenerateSampleSecrets("ClickHouse", "4b1dbRdW3rOcB7xLthrM4BTBGK1qPLkHigpN1bXD6z")
+	tps = append(tps, utils.GenerateSampleSecrets("ClickHouse", "4b1d"+secrets.NewSecret("[A-Za-z0-9]{38}"))...)
+	fps := []string{
+		`key = 4b1dXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX`,    // Low entropy
+		`key = adf4b1dbRdW3rOcB7xLthrM4BTBGK1qPLkHigpN1bXD6z`, // Not start of a word
+	}
+	return utils.Validate(r, tps, fps)
+}

+ 13 - 11
cmd/generate/config/rules/config.tmpl

@@ -13,21 +13,22 @@
 
 
 title = "{{.Title}}"
 title = "{{.Title}}"
 
 
-[allowlist]
-description = "{{.Allowlist.Description}}"
-{{- with .Allowlist.RegexTarget }}
-regexTarget = "{{ . }}"{{ end -}}
-{{- with .Allowlist.Regexes }}
-regexes = [{{ range $i, $regex := . }}
-    '''{{ $regex }}''',{{ end }}
+{{ with .Allowlists }}{{ range $i, $allowlist := . }}{{ if or $allowlist.Regexes $allowlist.Paths $allowlist.Commits $allowlist.StopWords }}# TODO: change to [[allowlists]]{{println}}[allowlist]
+{{- with .Description }}{{println}}description = "{{ . }}"{{ end }}
+{{- with .MatchCondition }}{{println}}condition = "{{ .String }}"{{ end }}
+{{- with .Commits -}}{{println}}commits = [
+    {{ range $j, $commit := . }}"{{ $commit }}",{{ end }}
 ]{{ end }}
 ]{{ end }}
-{{- with .Allowlist.Paths }}
-paths = [{{ range $i, $path := . }}
+{{- with .Paths }}{{println}}paths = [{{ range $j, $path := . }}
     '''{{ $path }}''',{{ end }}
     '''{{ $path }}''',{{ end }}
 ]{{ end }}
 ]{{ end }}
-{{- with .Allowlist.StopWords }}{{println}}stopwords = [{{ range $j, $stopword := . }}
+{{- if and .RegexTarget .Regexes }}{{println}}regexTarget = "{{ .RegexTarget }}"{{ end -}}
+{{- with .Regexes }}{{println}}regexes = [{{ range $i, $regex := . }}
+    '''{{ $regex }}''',{{ end }}
+]{{ end }}
+{{- with .StopWords }}{{println}}stopwords = [{{ range $j, $stopword := . }}
     "{{ $stopword }}",{{ end }}
     "{{ $stopword }}",{{ end }}
-]{{ end }}{{println}}
+]{{ end }}{{ end }}{{ end }}{{ end }}{{println}}
 
 
 {{- range $i, $rule := .Rules }}{{println}}[[rules]]
 {{- range $i, $rule := .Rules }}{{println}}[[rules]]
 id = "{{$rule.RuleID}}"
 id = "{{$rule.RuleID}}"
@@ -51,6 +52,7 @@ tags = [
     {{ range $j, $tag := . }}"{{ $tag }}",{{ end }}
     {{ range $j, $tag := . }}"{{ $tag }}",{{ end }}
 ]{{ end }}
 ]{{ end }}
 {{- with $rule.Allowlists }}{{ range $i, $allowlist := . }}{{ if or $allowlist.Regexes $allowlist.Paths $allowlist.Commits $allowlist.StopWords }}{{println}}[[rules.allowlists]]
 {{- with $rule.Allowlists }}{{ range $i, $allowlist := . }}{{ if or $allowlist.Regexes $allowlist.Paths $allowlist.Commits $allowlist.StopWords }}{{println}}[[rules.allowlists]]
+{{- with .Description }}{{println}}description = "{{ . }}"{{ end }}
 {{- with .MatchCondition }}{{println}}condition = "{{ .String }}"{{ end }}
 {{- with .MatchCondition }}{{println}}condition = "{{ .String }}"{{ end }}
 {{- with .Commits -}}{{println}}commits = [
 {{- with .Commits -}}{{println}}commits = [
     {{ range $j, $commit := . }}"{{ $commit }}",{{ end }}
     {{ range $j, $commit := . }}"{{ $commit }}",{{ end }}

+ 2 - 4
cmd/generate/config/rules/gcp.go

@@ -30,10 +30,8 @@ func GCPAPIKey() *config.Rule {
 		RuleID:      "gcp-api-key",
 		RuleID:      "gcp-api-key",
 		Description: "Uncovered a GCP API key, which could lead to unauthorized access to Google Cloud services and data breaches.",
 		Description: "Uncovered a GCP API key, which could lead to unauthorized access to Google Cloud services and data breaches.",
 		Regex:       utils.GenerateUniqueTokenRegex(`AIza[\w-]{35}`, false),
 		Regex:       utils.GenerateUniqueTokenRegex(`AIza[\w-]{35}`, false),
-		Entropy:     3.0,
-		Keywords: []string{
-			"AIza",
-		},
+		Entropy:     4,
+		Keywords:    []string{"AIza"},
 		Allowlists: []*config.Allowlist{
 		Allowlists: []*config.Allowlist{
 			{
 			{
 				Regexes: []*regexp.Regexp{
 				Regexes: []*regexp.Regexp{

+ 3 - 3
cmd/generate/config/rules/kubernetes.go

@@ -17,7 +17,7 @@ func KubernetesSecret() *config.Rule {
 	// - valid base64 characters
 	// - valid base64 characters
 	// - longer than 10 characters (no "YmFyCg==")
 	// - longer than 10 characters (no "YmFyCg==")
 	//language=regexp
 	//language=regexp
-	dataPat := `\bdata:(?:.|\s){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))`
+	dataPat := `\bdata:(?s:.){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))`
 
 
 	// define rule
 	// define rule
 	r := config.Rule{
 	r := config.Rule{
@@ -25,7 +25,7 @@ func KubernetesSecret() *config.Rule {
 		Description: "Possible Kubernetes Secret detected, posing a risk of leaking credentials/tokens from your deployments",
 		Description: "Possible Kubernetes Secret detected, posing a risk of leaking credentials/tokens from your deployments",
 		Regex: regexp.MustCompile(fmt.Sprintf(
 		Regex: regexp.MustCompile(fmt.Sprintf(
 			//language=regexp
 			//language=regexp
-			`(?i)(?:%s(?:.|\s){0,200}?%s|%s(?:.|\s){0,200}?%s)`, kindPat, dataPat, dataPat, kindPat)),
+			`(?i)(?:%s(?s:.){0,200}?%s|%s(?s:.){0,200}?%s)`, kindPat, dataPat, dataPat, kindPat)),
 		Keywords: []string{
 		Keywords: []string{
 			"secret",
 			"secret",
 		},
 		},
@@ -45,7 +45,7 @@ func KubernetesSecret() *config.Rule {
 				// Avoid overreach between directives.
 				// Avoid overreach between directives.
 				RegexTarget: "match",
 				RegexTarget: "match",
 				Regexes: []*regexp.Regexp{
 				Regexes: []*regexp.Regexp{
-					regexp.MustCompile(`(kind:(?:.|\s)+\n---\n(?:.|\s)+\bdata:|data:(?:.|\s)+\n---\n(?:.|\s)+\bkind:)`),
+					regexp.MustCompile(`(kind:(?s:.)+\n---\n(?s:.)+\bdata:|data:(?s:.)+\n---\n(?s:.)+\bkind:)`),
 				},
 				},
 			},
 			},
 		},
 		},

+ 27 - 0
cmd/generate/config/rules/perplexity.go

@@ -0,0 +1,27 @@
+package rules
+
+import (
+	"regexp"
+
+	"github.com/zricethezav/gitleaks/v8/cmd/generate/config/utils"
+
+	"github.com/zricethezav/gitleaks/v8/config"
+)
+
+func PerplexityAPIKey() *config.Rule {
+	// Define Rule
+	r := config.Rule{
+		RuleID:      "perplexity-api-key",
+		Description: "Detected a Perplexity API key, which could lead to unauthorized access to Perplexity AI services and data exposure.",
+		Regex:       regexp.MustCompile(`\b(pplx-[a-zA-Z0-9]{48})(?:[\x60'"\s;]|\\[nr]|$|\b)`),
+		Keywords:    []string{"pplx-"},
+		Entropy:     4.0,
+	}
+
+	// validate
+	tps := utils.GenerateSampleSecrets("perplexity", "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'")
+	fps := []string{
+		"PERPLEXITY_API_KEY=pplx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+	}
+	return utils.Validate(r, tps, fps)
+}

+ 2 - 0
cmd/git.go

@@ -42,6 +42,8 @@ func runGit(cmd *cobra.Command, args []string) {
 
 
 	// setup config (aka, the thing that defines rules)
 	// setup config (aka, the thing that defines rules)
 	initConfig(source)
 	initConfig(source)
+	initDiagnostics()
+
 	cfg := Config(cmd)
 	cfg := Config(cmd)
 
 
 	// create detector
 	// create detector

+ 2 - 0
cmd/protect.go

@@ -33,6 +33,8 @@ func runProtect(cmd *cobra.Command, args []string) {
 
 
 	// setup config (aka, the thing that defines rules)
 	// setup config (aka, the thing that defines rules)
 	initConfig(source)
 	initConfig(source)
+	initDiagnostics()
+
 	cfg := Config(cmd)
 	cfg := Config(cmd)
 
 
 	// create detector
 	// create detector

+ 47 - 5
cmd/root.go

@@ -38,11 +38,17 @@ order of precedence:
 4. (target path)/.gitleaks.toml
 4. (target path)/.gitleaks.toml
 If none of the four options are used, then gitleaks will use the default config`
 If none of the four options are used, then gitleaks will use the default config`
 
 
-var rootCmd = &cobra.Command{
-	Use:     "gitleaks",
-	Short:   "Gitleaks scans code, past or present, for secrets",
-	Version: Version,
-}
+var (
+	rootCmd = &cobra.Command{
+		Use:     "gitleaks",
+		Short:   "Gitleaks scans code, past or present, for secrets",
+		Version: Version,
+	}
+
+	// diagnostics manager is global to ensure it can be started before a scan begins
+	// and stopped after a scan completes
+	diagnosticsManager *DiagnosticsManager
+)
 
 
 const (
 const (
 	BYTE     = 1.0
 	BYTE     = 1.0
@@ -71,6 +77,10 @@ func init() {
 	rootCmd.PersistentFlags().StringP("gitleaks-ignore-path", "i", ".", "path to .gitleaksignore file or folder containing one")
 	rootCmd.PersistentFlags().StringP("gitleaks-ignore-path", "i", ".", "path to .gitleaksignore file or folder containing one")
 	rootCmd.PersistentFlags().Int("max-decode-depth", 0, "allow recursive decoding up to this depth (default \"0\", no decoding is done)")
 	rootCmd.PersistentFlags().Int("max-decode-depth", 0, "allow recursive decoding up to this depth (default \"0\", no decoding is done)")
 
 
+	// Add diagnostics flags
+	rootCmd.PersistentFlags().String("diagnostics", "", "enable diagnostics (comma-separated list: cpu,mem,trace). cpu=CPU profiling, mem=memory profiling, trace=execution tracing")
+	rootCmd.PersistentFlags().String("diagnostics-dir", "", "directory to store diagnostics output files (defaults to current directory)")
+
 	err := viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config"))
 	err := viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config"))
 	if err != nil {
 	if err != nil {
 		logging.Fatal().Msgf("err binding config %s", err.Error())
 		logging.Fatal().Msgf("err binding config %s", err.Error())
@@ -169,6 +179,33 @@ func initConfig(source string) {
 	}
 	}
 }
 }
 
 
+func initDiagnostics() {
+	// Initialize diagnostics manager
+	diagnosticsFlag, err := rootCmd.PersistentFlags().GetString("diagnostics")
+	if err != nil {
+		logging.Fatal().Err(err).Msg("Error getting diagnostics flag")
+	}
+
+	diagnosticsDir, err := rootCmd.PersistentFlags().GetString("diagnostics-dir")
+	if err != nil {
+		logging.Fatal().Err(err).Msg("Error getting diagnostics-dir flag")
+	}
+
+	var diagErr error
+	diagnosticsManager, diagErr = NewDiagnosticsManager(diagnosticsFlag, diagnosticsDir)
+	if diagErr != nil {
+		logging.Fatal().Err(diagErr).Msg("Error initializing diagnostics")
+	}
+
+	if diagnosticsManager.Enabled {
+		logging.Info().Msg("Starting diagnostics...")
+		if diagErr := diagnosticsManager.StartDiagnostics(); diagErr != nil {
+			logging.Fatal().Err(diagErr).Msg("Failed to start diagnostics")
+		}
+	}
+
+}
+
 func Execute() {
 func Execute() {
 	if err := rootCmd.Execute(); err != nil {
 	if err := rootCmd.Execute(); err != nil {
 		if strings.Contains(err.Error(), "unknown flag") {
 		if strings.Contains(err.Error(), "unknown flag") {
@@ -379,6 +416,11 @@ func bytesConvert(bytes uint64) string {
 }
 }
 
 
 func findingSummaryAndExit(detector *detect.Detector, findings []report.Finding, exitCode int, start time.Time, err error) {
 func findingSummaryAndExit(detector *detect.Detector, findings []report.Finding, exitCode int, start time.Time, err error) {
+	if diagnosticsManager.Enabled {
+		logging.Debug().Msg("Finalizing diagnostics...")
+		diagnosticsManager.StopDiagnostics()
+	}
+
 	totalBytes := detector.TotalBytes.Load()
 	totalBytes := detector.TotalBytes.Load()
 	bytesMsg := fmt.Sprintf("scanned ~%d bytes (%s)", totalBytes, bytesConvert(totalBytes))
 	bytesMsg := fmt.Sprintf("scanned ~%d bytes (%s)", totalBytes, bytesConvert(totalBytes))
 	if err == nil {
 	if err == nil {

+ 2 - 0
cmd/stdin.go

@@ -25,6 +25,8 @@ func runStdIn(cmd *cobra.Command, _ []string) {
 
 
 	// setup config (aka, the thing that defines rules)
 	// setup config (aka, the thing that defines rules)
 	initConfig(".")
 	initConfig(".")
+	initDiagnostics()
+
 	cfg := Config(cmd)
 	cfg := Config(cmd)
 
 
 	// create detector
 	// create detector

+ 33 - 34
config/allowlist.go

@@ -59,6 +59,39 @@ type Allowlist struct {
 	validated bool
 	validated bool
 }
 }
 
 
+func (a *Allowlist) Validate() error {
+	if a.validated {
+		return nil
+	}
+
+	// Disallow empty allowlists.
+	if len(a.Commits) == 0 &&
+		len(a.Paths) == 0 &&
+		len(a.Regexes) == 0 &&
+		len(a.StopWords) == 0 {
+		return fmt.Errorf("must contain at least one check for: commits, paths, regexes, or stopwords")
+	}
+
+	// Deduplicate commits and stopwords.
+	if len(a.Commits) > 0 {
+		uniqueCommits := make(map[string]struct{})
+		for _, commit := range a.Commits {
+			uniqueCommits[commit] = struct{}{}
+		}
+		a.Commits = maps.Keys(uniqueCommits)
+	}
+	if len(a.StopWords) > 0 {
+		uniqueStopwords := make(map[string]struct{})
+		for _, stopWord := range a.StopWords {
+			uniqueStopwords[stopWord] = struct{}{}
+		}
+		a.StopWords = maps.Keys(uniqueStopwords)
+	}
+
+	a.validated = true
+	return nil
+}
+
 // CommitAllowed returns true if the commit is allowed to be ignored.
 // CommitAllowed returns true if the commit is allowed to be ignored.
 func (a *Allowlist) CommitAllowed(c string) (bool, string) {
 func (a *Allowlist) CommitAllowed(c string) (bool, string) {
 	if a == nil || c == "" {
 	if a == nil || c == "" {
@@ -102,37 +135,3 @@ func (a *Allowlist) ContainsStopWord(s string) (bool, string) {
 	}
 	}
 	return false, ""
 	return false, ""
 }
 }
-
-func (a *Allowlist) Validate() error {
-	if a.validated {
-		return nil
-	}
-
-	// Disallow empty allowlists.
-	if len(a.Commits) == 0 &&
-		len(a.Paths) == 0 &&
-		len(a.Regexes) == 0 &&
-		len(a.StopWords) == 0 {
-		return fmt.Errorf("[[rules.allowlists]] must contain at least one check for: commits, paths, regexes, or stopwords")
-	}
-
-	// Deduplicate commits and stopwords.
-	if len(a.Commits) > 0 {
-		uniqueCommits := make(map[string]struct{})
-		for _, commit := range a.Commits {
-			uniqueCommits[commit] = struct{}{}
-		}
-		a.Commits = maps.Keys(uniqueCommits)
-	}
-
-	if len(a.StopWords) > 0 {
-		uniqueStopwords := make(map[string]struct{})
-		for _, stopWord := range a.StopWords {
-			uniqueStopwords[stopWord] = struct{}{}
-		}
-		a.StopWords = maps.Keys(uniqueStopwords)
-	}
-
-	a.validated = true
-	return nil
-}

+ 1 - 1
config/allowlist_test.go

@@ -105,7 +105,7 @@ func TestValidate(t *testing.T) {
 	}{
 	}{
 		"empty conditions": {
 		"empty conditions": {
 			input:   Allowlist{},
 			input:   Allowlist{},
-			wantErr: errors.New("[[rules.allowlists]] must contain at least one check for: commits, paths, regexes, or stopwords"),
+			wantErr: errors.New("must contain at least one check for: commits, paths, regexes, or stopwords"),
 		},
 		},
 		"deduplicated commits and stopwords": {
 		"deduplicated commits and stopwords": {
 			input: Allowlist{
 			input: Allowlist{

+ 153 - 116
config/config.go

@@ -2,6 +2,7 @@ package config
 
 
 import (
 import (
 	_ "embed"
 	_ "embed"
+	"errors"
 	"fmt"
 	"fmt"
 	"sort"
 	"sort"
 	"strings"
 	"strings"
@@ -12,12 +13,14 @@ import (
 	"github.com/zricethezav/gitleaks/v8/regexp"
 	"github.com/zricethezav/gitleaks/v8/regexp"
 )
 )
 
 
-//go:embed gitleaks.toml
-var DefaultConfig string
+var (
+	//go:embed gitleaks.toml
+	DefaultConfig string
 
 
-// use to keep track of how many configs we can extend
-// yea I know, globals bad
-var extendDepth int
+	// use to keep track of how many configs we can extend
+	// yea I know, globals bad
+	extendDepth int
+)
 
 
 const maxExtendDepth = 2
 const maxExtendDepth = 2
 
 
@@ -25,29 +28,28 @@ const maxExtendDepth = 2
 // to parse the config file. This struct does not include regular expressions.
 // to parse the config file. This struct does not include regular expressions.
 // It is used as an intermediary to convert the Viper config to the Config struct.
 // It is used as an intermediary to convert the Viper config to the Config struct.
 type ViperConfig struct {
 type ViperConfig struct {
+	Title       string
 	Description string
 	Description string
 	Extend      Extend
 	Extend      Extend
 	Rules       []struct {
 	Rules       []struct {
 		ID          string
 		ID          string
 		Description string
 		Description string
+		Path        string
 		Regex       string
 		Regex       string
 		SecretGroup int
 		SecretGroup int
 		Entropy     float64
 		Entropy     float64
 		Keywords    []string
 		Keywords    []string
-		Path        string
 		Tags        []string
 		Tags        []string
 
 
-		// Deprecated: this is a shim for backwards-compatibility. It should be removed in 9.x.
+		// Deprecated: this is a shim for backwards-compatibility.
+		// TODO: Remove this in 9.x.
 		AllowList  *viperRuleAllowlist
 		AllowList  *viperRuleAllowlist
-		Allowlists []viperRuleAllowlist
-	}
-	Allowlist struct {
-		Commits     []string
-		Paths       []string
-		RegexTarget string
-		Regexes     []string
-		StopWords   []string
+		Allowlists []*viperRuleAllowlist
 	}
 	}
+	// Deprecated: this is a shim for backwards-compatibility.
+	// TODO: Remove this in 9.x.
+	AllowList  *viperGlobalAllowlist
+	Allowlists []*viperGlobalAllowlist
 }
 }
 
 
 type viperRuleAllowlist struct {
 type viperRuleAllowlist struct {
@@ -60,6 +62,11 @@ type viperRuleAllowlist struct {
 	StopWords   []string
 	StopWords   []string
 }
 }
 
 
+type viperGlobalAllowlist struct {
+	TargetRules        []string
+	viperRuleAllowlist `mapstructure:",squash"`
+}
+
 // Config is a configuration struct that contains rules and an allowlist if present.
 // Config is a configuration struct that contains rules and an allowlist if present.
 type Config struct {
 type Config struct {
 	Title       string
 	Title       string
@@ -67,11 +74,10 @@ type Config struct {
 	Path        string
 	Path        string
 	Description string
 	Description string
 	Rules       map[string]Rule
 	Rules       map[string]Rule
-	Allowlist   *Allowlist
 	Keywords    map[string]struct{}
 	Keywords    map[string]struct{}
-
 	// used to keep sarif results consistent
 	// used to keep sarif results consistent
 	OrderedRules []string
 	OrderedRules []string
+	Allowlists   []*Allowlist
 }
 }
 
 
 // Extend is a struct that allows users to define how they want their
 // Extend is a struct that allows users to define how they want their
@@ -85,13 +91,24 @@ type Extend struct {
 
 
 func (vc *ViperConfig) Translate() (Config, error) {
 func (vc *ViperConfig) Translate() (Config, error) {
 	var (
 	var (
-		keywords     = make(map[string]struct{})
-		orderedRules []string
-		rulesMap     = make(map[string]Rule)
+		keywords       = make(map[string]struct{})
+		orderedRules   []string
+		rulesMap       = make(map[string]Rule)
+		ruleAllowlists = make(map[string][]*Allowlist)
 	)
 	)
 
 
 	// Validate individual rules.
 	// Validate individual rules.
 	for _, vr := range vc.Rules {
 	for _, vr := range vc.Rules {
+		var (
+			pathPat  *regexp.Regexp
+			regexPat *regexp.Regexp
+		)
+		if vr.Path != "" {
+			pathPat = regexp.MustCompile(vr.Path)
+		}
+		if vr.Regex != "" {
+			regexPat = regexp.MustCompile(vr.Regex)
+		}
 		if vr.Keywords == nil {
 		if vr.Keywords == nil {
 			vr.Keywords = []string{}
 			vr.Keywords = []string{}
 		} else {
 		} else {
@@ -101,115 +118,85 @@ func (vc *ViperConfig) Translate() (Config, error) {
 				vr.Keywords[i] = keyword
 				vr.Keywords[i] = keyword
 			}
 			}
 		}
 		}
-
 		if vr.Tags == nil {
 		if vr.Tags == nil {
 			vr.Tags = []string{}
 			vr.Tags = []string{}
 		}
 		}
-
-		var configRegex *regexp.Regexp
-		var configPathRegex *regexp.Regexp
-		if vr.Regex != "" {
-			configRegex = regexp.MustCompile(vr.Regex)
-		}
-		if vr.Path != "" {
-			configPathRegex = regexp.MustCompile(vr.Path)
-		}
-
 		cr := Rule{
 		cr := Rule{
 			RuleID:      vr.ID,
 			RuleID:      vr.ID,
 			Description: vr.Description,
 			Description: vr.Description,
-			Regex:       configRegex,
+			Regex:       regexPat,
 			SecretGroup: vr.SecretGroup,
 			SecretGroup: vr.SecretGroup,
 			Entropy:     vr.Entropy,
 			Entropy:     vr.Entropy,
-			Path:        configPathRegex,
+			Path:        pathPat,
 			Keywords:    vr.Keywords,
 			Keywords:    vr.Keywords,
 			Tags:        vr.Tags,
 			Tags:        vr.Tags,
 		}
 		}
-		// Parse the allowlist, including the older format for backwards compatibility.
+
+		// Parse the rule allowlists, including the older format for backwards compatibility.
 		if vr.AllowList != nil {
 		if vr.AllowList != nil {
+			// TODO: Remove this in v9.
 			if len(vr.Allowlists) > 0 {
 			if len(vr.Allowlists) > 0 {
 				return Config{}, fmt.Errorf("%s: [rules.allowlist] is deprecated, it cannot be used alongside [[rules.allowlist]]", cr.RuleID)
 				return Config{}, fmt.Errorf("%s: [rules.allowlist] is deprecated, it cannot be used alongside [[rules.allowlist]]", cr.RuleID)
 			}
 			}
-			vr.Allowlists = append(vr.Allowlists, *vr.AllowList)
+			vr.Allowlists = append(vr.Allowlists, vr.AllowList)
 		}
 		}
 		for _, a := range vr.Allowlists {
 		for _, a := range vr.Allowlists {
-			var condition AllowlistMatchCondition
-			c := strings.ToUpper(a.Condition)
-			switch c {
-			case "AND", "&&":
-				condition = AllowlistMatchAnd
-			case "", "OR", "||":
-				condition = AllowlistMatchOr
-			default:
-				return Config{}, fmt.Errorf("%s: unknown allowlist condition '%s' (expected 'and', 'or')", cr.RuleID, c)
-			}
-
-			// Validate the target.
-			if a.RegexTarget != "" {
-				switch a.RegexTarget {
-				case "secret":
-					a.RegexTarget = ""
-				case "match", "line":
-					// do nothing
-				default:
-					return Config{}, fmt.Errorf("%s: unknown allowlist |regexTarget| '%s' (expected 'match', 'line')", cr.RuleID, a.RegexTarget)
-				}
-			}
-			var allowlistRegexes []*regexp.Regexp
-			for _, a := range a.Regexes {
-				allowlistRegexes = append(allowlistRegexes, regexp.MustCompile(a))
-			}
-			var allowlistPaths []*regexp.Regexp
-			for _, a := range a.Paths {
-				allowlistPaths = append(allowlistPaths, regexp.MustCompile(a))
-			}
-
-			allowlist := &Allowlist{
-				Description:    a.Description,
-				MatchCondition: condition,
-				Commits:        a.Commits,
-				Paths:          allowlistPaths,
-				RegexTarget:    a.RegexTarget,
-				Regexes:        allowlistRegexes,
-				StopWords:      a.StopWords,
+			allowlist, err := parseAllowlist(a)
+			if err != nil {
+				return Config{}, fmt.Errorf("%s: [[rules.allowlists]] %w", cr.RuleID, err)
 			}
 			}
 			cr.Allowlists = append(cr.Allowlists, allowlist)
 			cr.Allowlists = append(cr.Allowlists, allowlist)
 		}
 		}
 		orderedRules = append(orderedRules, cr.RuleID)
 		orderedRules = append(orderedRules, cr.RuleID)
 		rulesMap[cr.RuleID] = cr
 		rulesMap[cr.RuleID] = cr
 	}
 	}
-	var allowlistRegexes []*regexp.Regexp
-	for _, a := range vc.Allowlist.Regexes {
-		allowlistRegexes = append(allowlistRegexes, regexp.MustCompile(a))
-	}
-	var allowlistPaths []*regexp.Regexp
-	for _, a := range vc.Allowlist.Paths {
-		allowlistPaths = append(allowlistPaths, regexp.MustCompile(a))
-	}
+
+	// Assemble the config.
 	c := Config{
 	c := Config{
-		Description: vc.Description,
-		Extend:      vc.Extend,
-		Rules:       rulesMap,
-		Allowlist: &Allowlist{
-			RegexTarget: vc.Allowlist.RegexTarget,
-			Regexes:     allowlistRegexes,
-			Paths:       allowlistPaths,
-			Commits:     vc.Allowlist.Commits,
-			StopWords:   vc.Allowlist.StopWords,
-		},
+		Title:        vc.Title,
+		Description:  vc.Description,
+		Extend:       vc.Extend,
+		Rules:        rulesMap,
 		Keywords:     keywords,
 		Keywords:     keywords,
 		OrderedRules: orderedRules,
 		OrderedRules: orderedRules,
 	}
 	}
+	// Parse the config allowlists, including the older format for backwards compatibility.
+	if vc.AllowList != nil {
+		// TODO: Remove this in v9.
+		if len(vc.Allowlists) > 0 {
+			return Config{}, errors.New("[allowlist] is deprecated, it cannot be used alongside [[allowlists]]")
+		}
+		vc.Allowlists = append(vc.Allowlists, vc.AllowList)
+	}
+	for _, a := range vc.Allowlists {
+		allowlist, err := parseAllowlist(&a.viperRuleAllowlist)
+		if err != nil {
+			return Config{}, fmt.Errorf("[[allowlists]] %w", err)
+		}
+		// Allowlists with |targetRules| aren't added to the global list.
+		if len(a.TargetRules) > 0 {
+			for _, ruleID := range a.TargetRules {
+				// It's not possible to validate |ruleID| until after extend.
+				ruleAllowlists[ruleID] = append(ruleAllowlists[ruleID], allowlist)
+			}
+		} else {
+			c.Allowlists = append(c.Allowlists, allowlist)
+		}
+	}
 
 
 	if maxExtendDepth != extendDepth {
 	if maxExtendDepth != extendDepth {
 		// disallow both usedefault and path from being set
 		// disallow both usedefault and path from being set
 		if c.Extend.Path != "" && c.Extend.UseDefault {
 		if c.Extend.Path != "" && c.Extend.UseDefault {
-			logging.Fatal().Msg("unable to load config due to extend.path and extend.useDefault being set")
+			return Config{}, errors.New("unable to load config due to extend.path and extend.useDefault being set")
 		}
 		}
 		if c.Extend.UseDefault {
 		if c.Extend.UseDefault {
-			c.extendDefault()
+			if err := c.extendDefault(); err != nil {
+				return Config{}, err
+			}
 		} else if c.Extend.Path != "" {
 		} else if c.Extend.Path != "" {
-			c.extendPath()
+			if err := c.extendPath(); err != nil {
+				return Config{}, err
+			}
 		}
 		}
 	}
 	}
 
 
@@ -220,11 +207,68 @@ func (vc *ViperConfig) Translate() (Config, error) {
 				return Config{}, err
 				return Config{}, err
 			}
 			}
 		}
 		}
+
+		// Populate targeted configs.
+		for ruleID, allowlists := range ruleAllowlists {
+			rule, ok := c.Rules[ruleID]
+			if !ok {
+				return Config{}, fmt.Errorf("[[allowlists]] target rule ID '%s' does not exist", ruleID)
+			}
+			rule.Allowlists = append(rule.Allowlists, allowlists...)
+			c.Rules[ruleID] = rule
+		}
 	}
 	}
 
 
 	return c, nil
 	return c, nil
 }
 }
 
 
+func parseAllowlist(a *viperRuleAllowlist) (*Allowlist, error) {
+	var matchCondition AllowlistMatchCondition
+	switch strings.ToUpper(a.Condition) {
+	case "AND", "&&":
+		matchCondition = AllowlistMatchAnd
+	case "", "OR", "||":
+		matchCondition = AllowlistMatchOr
+	default:
+		return nil, fmt.Errorf("unknown allowlist |condition| '%s' (expected 'and', 'or')", a.Condition)
+	}
+
+	// Validate the target.
+	regexTarget := a.RegexTarget
+	if regexTarget != "" {
+		switch regexTarget {
+		case "secret":
+			regexTarget = ""
+		case "match", "line":
+			// do nothing
+		default:
+			return nil, fmt.Errorf("unknown allowlist |regexTarget| '%s' (expected 'match', 'line')", regexTarget)
+		}
+	}
+	var allowlistRegexes []*regexp.Regexp
+	for _, a := range a.Regexes {
+		allowlistRegexes = append(allowlistRegexes, regexp.MustCompile(a))
+	}
+	var allowlistPaths []*regexp.Regexp
+	for _, a := range a.Paths {
+		allowlistPaths = append(allowlistPaths, regexp.MustCompile(a))
+	}
+
+	allowlist := &Allowlist{
+		Description:    a.Description,
+		MatchCondition: matchCondition,
+		Commits:        a.Commits,
+		Paths:          allowlistPaths,
+		RegexTarget:    regexTarget,
+		Regexes:        allowlistRegexes,
+		StopWords:      a.StopWords,
+	}
+	if err := allowlist.Validate(); err != nil {
+		return nil, err
+	}
+	return allowlist, nil
+}
+
 func (c *Config) GetOrderedRules() []Rule {
 func (c *Config) GetOrderedRules() []Rule {
 	var orderedRules []Rule
 	var orderedRules []Rule
 	for _, id := range c.OrderedRules {
 	for _, id := range c.OrderedRules {
@@ -235,47 +279,43 @@ func (c *Config) GetOrderedRules() []Rule {
 	return orderedRules
 	return orderedRules
 }
 }
 
 
-func (c *Config) extendDefault() {
+func (c *Config) extendDefault() error {
 	extendDepth++
 	extendDepth++
 	viper.SetConfigType("toml")
 	viper.SetConfigType("toml")
 	if err := viper.ReadConfig(strings.NewReader(DefaultConfig)); err != nil {
 	if err := viper.ReadConfig(strings.NewReader(DefaultConfig)); err != nil {
-		logging.Fatal().Msgf("failed to load extended config, err: %s", err)
-		return
+		return fmt.Errorf("failed to load extended default config, err: %w", err)
 	}
 	}
 	defaultViperConfig := ViperConfig{}
 	defaultViperConfig := ViperConfig{}
 	if err := viper.Unmarshal(&defaultViperConfig); err != nil {
 	if err := viper.Unmarshal(&defaultViperConfig); err != nil {
-		logging.Fatal().Msgf("failed to load extended config, err: %s", err)
-		return
+		return fmt.Errorf("failed to load extended default config, err: %w", err)
 	}
 	}
 	cfg, err := defaultViperConfig.Translate()
 	cfg, err := defaultViperConfig.Translate()
 	if err != nil {
 	if err != nil {
-		logging.Fatal().Msgf("failed to load extended config, err: %s", err)
-		return
+		return fmt.Errorf("failed to load extended default config, err: %w", err)
+
 	}
 	}
 	logging.Debug().Msg("extending config with default config")
 	logging.Debug().Msg("extending config with default config")
 	c.extend(cfg)
 	c.extend(cfg)
-
+	return nil
 }
 }
 
 
-func (c *Config) extendPath() {
+func (c *Config) extendPath() error {
 	extendDepth++
 	extendDepth++
 	viper.SetConfigFile(c.Extend.Path)
 	viper.SetConfigFile(c.Extend.Path)
 	if err := viper.ReadInConfig(); err != nil {
 	if err := viper.ReadInConfig(); err != nil {
-		logging.Fatal().Msgf("failed to load extended config, err: %s", err)
-		return
+		return fmt.Errorf("failed to load extended config, err: %w", err)
 	}
 	}
 	extensionViperConfig := ViperConfig{}
 	extensionViperConfig := ViperConfig{}
 	if err := viper.Unmarshal(&extensionViperConfig); err != nil {
 	if err := viper.Unmarshal(&extensionViperConfig); err != nil {
-		logging.Fatal().Msgf("failed to load extended config, err: %s", err)
-		return
+		return fmt.Errorf("failed to load extended config, err: %w", err)
 	}
 	}
 	cfg, err := extensionViperConfig.Translate()
 	cfg, err := extensionViperConfig.Translate()
 	if err != nil {
 	if err != nil {
-		logging.Fatal().Msgf("failed to load extended config, err: %s", err)
-		return
+		return fmt.Errorf("failed to load extended config, err: %w", err)
 	}
 	}
 	logging.Debug().Msgf("extending config with %s", c.Extend.Path)
 	logging.Debug().Msgf("extending config with %s", c.Extend.Path)
 	c.extend(cfg)
 	c.extend(cfg)
+	return nil
 }
 }
 
 
 func (c *Config) extendURL() {
 func (c *Config) extendURL() {
@@ -351,12 +391,9 @@ func (c *Config) extend(extensionConfig Config) {
 	}
 	}
 
 
 	// append allowlists, not attempting to merge
 	// append allowlists, not attempting to merge
-	c.Allowlist.Commits = append(c.Allowlist.Commits,
-		extensionConfig.Allowlist.Commits...)
-	c.Allowlist.Paths = append(c.Allowlist.Paths,
-		extensionConfig.Allowlist.Paths...)
-	c.Allowlist.Regexes = append(c.Allowlist.Regexes,
-		extensionConfig.Allowlist.Regexes...)
+	for _, a := range extensionConfig.Allowlists {
+		c.Allowlists = append(c.Allowlists, a)
+	}
 
 
 	// sort to keep extended rules in order
 	// sort to keep extended rules in order
 	sort.Strings(c.OrderedRules)
 	sort.Strings(c.OrderedRules)

+ 368 - 166
config/config_test.go

@@ -2,10 +2,10 @@ package config
 
 
 import (
 import (
 	"fmt"
 	"fmt"
-	"github.com/google/go-cmp/cmp/cmpopts"
 	"testing"
 	"testing"
 
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/spf13/viper"
 	"github.com/spf13/viper"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/require"
@@ -15,19 +15,218 @@ import (
 
 
 const configPath = "../testdata/config/"
 const configPath = "../testdata/config/"
 
 
+var regexComparer = func(x, y *regexp.Regexp) bool {
+	if x == nil || y == nil {
+		return x == y
+	}
+	return x.String() == y.String()
+}
+
+type translateCase struct {
+	// Configuration file basename to load, from `../testdata/config/`.
+	cfgName string
+	// Expected result.
+	cfg Config
+	// Rules to compare.
+	rules []string
+	// Error to expect.
+	wantError error
+}
+
 func TestTranslate(t *testing.T) {
 func TestTranslate(t *testing.T) {
-	tests := []struct {
-		// Configuration file basename to load, from `../testdata/config/`.
-		cfgName string
-		// Expected result.
-		cfg Config
-		// Rules to compare.
-		rules []string
-		// Error to expect.
-		wantError error
-	}{
+	tests := []translateCase{
+		// Valid
+		{
+			cfgName: "generic",
+			cfg: Config{
+				Title: "gitleaks config",
+				Rules: map[string]Rule{"generic-api-key": {
+					RuleID:      "generic-api-key",
+					Description: "Generic API Key",
+					Regex:       regexp.MustCompile(`(?i)(?:key|api|token|secret|client|passwd|password|auth|access)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-z\-_.=]{10,150})(?:['|\"|\n|\r|\s|\x60|;]|$)`),
+					Entropy:     3.5,
+					Keywords:    []string{"key", "api", "token", "secret", "client", "passwd", "password", "auth", "access"},
+					Tags:        []string{},
+				}},
+			},
+		},
+		{
+			cfgName: "valid/rule_path_only",
+			cfg: Config{
+				Rules: map[string]Rule{"python-files-only": {
+					RuleID:      "python-files-only",
+					Description: "Python Files",
+					Path:        regexp.MustCompile(`.py`),
+					Keywords:    []string{},
+					Tags:        []string{},
+				}},
+			},
+		},
+		{
+			cfgName: "valid/rule_regex_escaped_character_group",
+			cfg: Config{
+				Rules: map[string]Rule{"pypi-upload-token": {
+					RuleID:      "pypi-upload-token",
+					Description: "PyPI upload token",
+					Regex:       regexp.MustCompile(`pypi-AgEIcHlwaS5vcmc[A-Za-z0-9\-_]{50,1000}`),
+					Keywords:    []string{},
+					Tags:        []string{"key", "pypi"},
+				}},
+			},
+		},
+		{
+			cfgName: "valid/rule_entropy_group",
+			cfg: Config{
+				Rules: map[string]Rule{"discord-api-key": {
+					RuleID:      "discord-api-key",
+					Description: "Discord API key",
+					Regex:       regexp.MustCompile(`(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]`),
+					Entropy:     3.5,
+					SecretGroup: 3,
+					Keywords:    []string{},
+					Tags:        []string{},
+				}},
+			},
+		},
+
+		// Invalid
+		{
+			cfgName:   "invalid/rule_missing_id",
+			cfg:       Config{},
+			wantError: fmt.Errorf("rule |id| is missing or empty, regex: (?i)(discord[a-z0-9_ .\\-,]{0,25})(=|>|:=|\\|\\|:|<=|=>|:).{0,5}['\\\"]([a-h0-9]{64})['\\\"]"),
+		},
+		{
+			cfgName:   "invalid/rule_no_regex_or_path",
+			cfg:       Config{},
+			wantError: fmt.Errorf("discord-api-key: both |regex| and |path| are empty, this rule will have no effect"),
+		},
+		{
+			cfgName:   "invalid/rule_bad_entropy_group",
+			cfg:       Config{},
+			wantError: fmt.Errorf("discord-api-key: invalid regex secret group 5, max regex secret group 3"),
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.cfgName, func(t *testing.T) {
+			testTranslate(t, tt)
+		})
+	}
+}
+
+func TestTranslateAllowlists(t *testing.T) {
+	tests := []translateCase{
+		// Global
+		{
+			cfgName: "valid/allowlist_global_old_compat",
+			cfg: Config{
+				Rules: map[string]Rule{},
+				Allowlists: []*Allowlist{
+					{
+						StopWords: []string{"0989c462-69c9-49fa-b7d2-30dc5c576a97"},
+					},
+				},
+			},
+		},
+		{
+			cfgName: "valid/allowlist_global_multiple",
+			cfg: Config{
+				Rules: map[string]Rule{
+					"test": {
+						RuleID:   "test",
+						Regex:    regexp.MustCompile(`token = "(.+)"`),
+						Keywords: []string{},
+						Tags:     []string{},
+					},
+				},
+				Allowlists: []*Allowlist{
+					{
+						Regexes: []*regexp.Regexp{regexp.MustCompile("^changeit$")},
+					},
+					{
+						MatchCondition: AllowlistMatchAnd,
+						Paths:          []*regexp.Regexp{regexp.MustCompile("^node_modules/.*")},
+						StopWords:      []string{"mock"},
+					},
+				},
+			},
+		},
+		{
+			cfgName: "valid/allowlist_global_target_rules",
+			cfg: Config{
+				Rules: map[string]Rule{
+					"github-app-token": {
+						RuleID:   "github-app-token",
+						Regex:    regexp.MustCompile(`(?:ghu|ghs)_[0-9a-zA-Z]{36}`),
+						Tags:     []string{},
+						Keywords: []string{},
+						Allowlists: []*Allowlist{
+							{
+								Paths: []*regexp.Regexp{regexp.MustCompile(`(?:^|/)@octokit/auth-token/README\.md$`)},
+							},
+						},
+					},
+					"github-oauth": {
+						RuleID:     "github-oauth",
+						Regex:      regexp.MustCompile(`gho_[0-9a-zA-Z]{36}`),
+						Tags:       []string{},
+						Keywords:   []string{},
+						Allowlists: nil,
+					},
+					"github-pat": {
+						RuleID:   "github-pat",
+						Regex:    regexp.MustCompile(`ghp_[0-9a-zA-Z]{36}`),
+						Tags:     []string{},
+						Keywords: []string{},
+						Allowlists: []*Allowlist{
+							{
+								Paths: []*regexp.Regexp{regexp.MustCompile(`(?:^|/)@octokit/auth-token/README\.md$`)},
+							},
+						},
+					},
+				},
+				Allowlists: []*Allowlist{
+					{
+						Regexes: []*regexp.Regexp{regexp.MustCompile(".*fake.*")},
+					},
+				},
+			},
+		},
+		{
+			cfgName: "valid/allowlist_global_regex",
+			cfg: Config{
+				Rules: map[string]Rule{},
+				Allowlists: []*Allowlist{
+					{
+						MatchCondition: AllowlistMatchOr,
+						Regexes:        []*regexp.Regexp{regexp.MustCompile("AKIALALEM.L33243OLIA")},
+					},
+				},
+			},
+		},
+		{
+			cfgName:   "invalid/allowlist_global_empty",
+			cfg:       Config{},
+			wantError: fmt.Errorf("[[allowlists]] must contain at least one check for: commits, paths, regexes, or stopwords"),
+		},
+		{
+			cfgName:   "invalid/allowlist_global_old_and_new",
+			cfg:       Config{},
+			wantError: fmt.Errorf("[allowlist] is deprecated, it cannot be used alongside [[allowlists]]"),
+		},
+		{
+			cfgName:   "invalid/allowlist_global_target_rule_id",
+			cfg:       Config{},
+			wantError: fmt.Errorf("[[allowlists]] target rule ID 'github-pat' does not exist"),
+		},
+		{
+			cfgName:   "invalid/allowlist_global_regextarget",
+			cfg:       Config{},
+			wantError: fmt.Errorf("[[allowlists]] unknown allowlist |regexTarget| 'mtach' (expected 'match', 'line')"),
+		},
+
+		// Rule
 		{
 		{
-			cfgName: "allowlist_old_compat",
+			cfgName: "valid/allowlist_rule_old_compat",
 			cfg: Config{
 			cfg: Config{
 				Rules: map[string]Rule{"example": {
 				Rules: map[string]Rule{"example": {
 					RuleID:   "example",
 					RuleID:   "example",
@@ -44,23 +243,9 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName:   "allowlist_invalid_empty",
-			cfg:       Config{},
-			wantError: fmt.Errorf("example: [[rules.allowlists]] must contain at least one check for: commits, paths, regexes, or stopwords"),
-		},
-		{
-			cfgName:   "allowlist_invalid_old_and_new",
-			cfg:       Config{},
-			wantError: fmt.Errorf("example: [rules.allowlist] is deprecated, it cannot be used alongside [[rules.allowlist]]"),
-		},
-		{
-			cfgName:   "allowlist_invalid_regextarget",
-			cfg:       Config{},
-			wantError: fmt.Errorf("example: unknown allowlist |regexTarget| 'mtach' (expected 'match', 'line')"),
-		},
-		{
-			cfgName: "allow_aws_re",
+			cfgName: "valid/allowlist_rule_regex",
 			cfg: Config{
 			cfg: Config{
+				Title: "simple config with allowlist for aws",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
@@ -77,8 +262,9 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "allow_commit",
+			cfgName: "valid/allowlist_rule_commit",
 			cfg: Config{
 			cfg: Config{
+				Title: "simple config with allowlist for a specific commit",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
@@ -95,8 +281,9 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "allow_path",
+			cfgName: "valid/allowlist_rule_path",
 			cfg: Config{
 			cfg: Config{
+				Title: "simple config with allowlist for .go files",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
@@ -113,36 +300,34 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "entropy_group",
-			cfg: Config{
-				Rules: map[string]Rule{"discord-api-key": {
-					RuleID:      "discord-api-key",
-					Description: "Discord API key",
-					Regex:       regexp.MustCompile(`(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]`),
-					Entropy:     3.5,
-					SecretGroup: 3,
-					Keywords:    []string{},
-					Tags:        []string{},
-				}},
-			},
-		},
-		{
-			cfgName:   "missing_id",
+			cfgName:   "invalid/allowlist_rule_empty",
 			cfg:       Config{},
 			cfg:       Config{},
-			wantError: fmt.Errorf("rule |id| is missing or empty, regex: (?i)(discord[a-z0-9_ .\\-,]{0,25})(=|>|:=|\\|\\|:|<=|=>|:).{0,5}['\\\"]([a-h0-9]{64})['\\\"]"),
+			wantError: fmt.Errorf("example: [[rules.allowlists]] must contain at least one check for: commits, paths, regexes, or stopwords"),
 		},
 		},
 		{
 		{
-			cfgName:   "no_regex_or_path",
+			cfgName:   "invalid/allowlist_rule_old_and_new",
 			cfg:       Config{},
 			cfg:       Config{},
-			wantError: fmt.Errorf("discord-api-key: both |regex| and |path| are empty, this rule will have no effect"),
+			wantError: fmt.Errorf("example: [rules.allowlist] is deprecated, it cannot be used alongside [[rules.allowlist]]"),
 		},
 		},
 		{
 		{
-			cfgName:   "bad_entropy_group",
+			cfgName:   "invalid/allowlist_rule_regextarget",
 			cfg:       Config{},
 			cfg:       Config{},
-			wantError: fmt.Errorf("discord-api-key: invalid regex secret group 5, max regex secret group 3"),
+			wantError: fmt.Errorf("example: [[rules.allowlists]] unknown allowlist |regexTarget| 'mtach' (expected 'match', 'line')"),
 		},
 		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.cfgName, func(t *testing.T) {
+			testTranslate(t, tt)
+		})
+	}
+}
+
+func TestTranslateExtend(t *testing.T) {
+	tests := []translateCase{
+		// Valid
 		{
 		{
-			cfgName: "base",
+			cfgName: "valid/extend",
 			cfg: Config{
 			cfg: Config{
 				Rules: map[string]Rule{
 				Rules: map[string]Rule{
 					"aws-access-key": {
 					"aws-access-key": {
@@ -170,63 +355,27 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "extend_rule_allowlist_or",
+			cfgName: "valid/extend_disabled",
 			cfg: Config{
 			cfg: Config{
+				Title: "gitleaks extend disable",
 				Rules: map[string]Rule{
 				Rules: map[string]Rule{
-					"aws-secret-key-again-again": {
-						RuleID:      "aws-secret-key-again-again",
-						Description: "AWS Secret Key",
-						Regex:       regexp.MustCompile(`(?i)aws_(.{0,20})?=?.[\'\"0-9a-zA-Z\/+]{40}`),
-						Keywords:    []string{},
-						Tags:        []string{"key", "AWS"},
-						Allowlists: []*Allowlist{
-							{
-								MatchCondition: AllowlistMatchOr,
-								StopWords:      []string{"fake"},
-							},
-							{
-								MatchCondition: AllowlistMatchOr,
-								Commits:        []string{"abcdefg1"},
-								Paths:          []*regexp.Regexp{regexp.MustCompile(`ignore\.xaml`)},
-								Regexes:        []*regexp.Regexp{regexp.MustCompile(`foo.+bar`)},
-								RegexTarget:    "line",
-								StopWords:      []string{"example"},
-							},
-						},
+					"aws-secret-key": {
+						RuleID:   "aws-secret-key",
+						Regex:    regexp.MustCompile(`(?i)aws_(.{0,20})?=?.[\'\"0-9a-zA-Z\/+]{40}`),
+						Tags:     []string{"key", "AWS"},
+						Keywords: []string{},
 					},
 					},
-				},
-			},
-		},
-		{
-			cfgName: "extend_rule_allowlist_and",
-			cfg: Config{
-				Rules: map[string]Rule{
-					"aws-secret-key-again-again": {
-						RuleID:      "aws-secret-key-again-again",
-						Description: "AWS Secret Key",
-						Regex:       regexp.MustCompile(`(?i)aws_(.{0,20})?=?.[\'\"0-9a-zA-Z\/+]{40}`),
-						Keywords:    []string{},
-						Tags:        []string{"key", "AWS"},
-						Allowlists: []*Allowlist{
-							{
-								MatchCondition: AllowlistMatchOr,
-								StopWords:      []string{"fake"},
-							},
-							{
-								MatchCondition: AllowlistMatchAnd,
-								Commits:        []string{"abcdefg1"},
-								Paths:          []*regexp.Regexp{regexp.MustCompile(`ignore\.xaml`)},
-								Regexes:        []*regexp.Regexp{regexp.MustCompile(`foo.+bar`)},
-								RegexTarget:    "line",
-								StopWords:      []string{"example"},
-							},
-						},
+					"pypi-upload-token": {
+						RuleID:   "pypi-upload-token",
+						Regex:    regexp.MustCompile(`pypi-AgEIcHlwaS5vcmc[A-Za-z0-9\-_]{50,1000}`),
+						Tags:     []string{},
+						Keywords: []string{},
 					},
 					},
 				},
 				},
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "extend_empty_regexpath",
+			cfgName: "valid/extend_rule_no_regexpath",
 			cfg: Config{
 			cfg: Config{
 				Rules: map[string]Rule{
 				Rules: map[string]Rule{
 					"aws-secret-key-again-again": {
 					"aws-secret-key-again-again": {
@@ -247,9 +396,10 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "override_description",
+			cfgName: "valid/extend_rule_override_description",
 			rules:   []string{"aws-access-key"},
 			rules:   []string{"aws-access-key"},
 			cfg: Config{
 			cfg: Config{
+				Title: "override a built-in rule's description",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "Puppy Doggy",
 					Description: "Puppy Doggy",
@@ -261,14 +411,15 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "override_entropy",
+			cfgName: "valid/extend_rule_override_path",
 			rules:   []string{"aws-access-key"},
 			rules:   []string{"aws-access-key"},
 			cfg: Config{
 			cfg: Config{
+				Title: "override a built-in rule's path",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
-					Entropy:     999.0,
+					Path:        regexp.MustCompile("(?:puppy)"),
 					Keywords:    []string{},
 					Keywords:    []string{},
 					Tags:        []string{"key", "AWS"},
 					Tags:        []string{"key", "AWS"},
 				},
 				},
@@ -276,14 +427,14 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "override_secret_group",
+			cfgName: "valid/extend_rule_override_regex",
 			rules:   []string{"aws-access-key"},
 			rules:   []string{"aws-access-key"},
 			cfg: Config{
 			cfg: Config{
+				Title: "override a built-in rule's regex",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
-					Regex:       regexp.MustCompile("(?:a)(?:a)"),
-					SecretGroup: 2,
+					Regex:       regexp.MustCompile("(?:a)"),
 					Keywords:    []string{},
 					Keywords:    []string{},
 					Tags:        []string{"key", "AWS"},
 					Tags:        []string{"key", "AWS"},
 				},
 				},
@@ -291,13 +442,15 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "override_regex",
+			cfgName: "valid/extend_rule_override_secret_group",
 			rules:   []string{"aws-access-key"},
 			rules:   []string{"aws-access-key"},
 			cfg: Config{
 			cfg: Config{
+				Title: "override a built-in rule's secretGroup",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
-					Regex:       regexp.MustCompile("(?:a)"),
+					Regex:       regexp.MustCompile("(?:a)(?:a)"),
+					SecretGroup: 2,
 					Keywords:    []string{},
 					Keywords:    []string{},
 					Tags:        []string{"key", "AWS"},
 					Tags:        []string{"key", "AWS"},
 				},
 				},
@@ -305,14 +458,15 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "override_path",
+			cfgName: "valid/extend_rule_override_entropy",
 			rules:   []string{"aws-access-key"},
 			rules:   []string{"aws-access-key"},
 			cfg: Config{
 			cfg: Config{
+				Title: "override a built-in rule's entropy",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
-					Path:        regexp.MustCompile("(?:puppy)"),
+					Entropy:     999.0,
 					Keywords:    []string{},
 					Keywords:    []string{},
 					Tags:        []string{"key", "AWS"},
 					Tags:        []string{"key", "AWS"},
 				},
 				},
@@ -320,97 +474,145 @@ func TestTranslate(t *testing.T) {
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "override_tags",
+			cfgName: "valid/extend_rule_override_keywords",
 			rules:   []string{"aws-access-key"},
 			rules:   []string{"aws-access-key"},
 			cfg: Config{
 			cfg: Config{
+				Title: "override a built-in rule's keywords",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
-					Keywords:    []string{},
-					Tags:        []string{"key", "AWS", "puppy"},
+					Keywords:    []string{"puppy"},
+					Tags:        []string{"key", "AWS"},
 				},
 				},
 				},
 				},
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "override_keywords",
+			cfgName: "valid/extend_rule_override_tags",
 			rules:   []string{"aws-access-key"},
 			rules:   []string{"aws-access-key"},
 			cfg: Config{
 			cfg: Config{
+				Title: "override a built-in rule's tags",
 				Rules: map[string]Rule{"aws-access-key": {
 				Rules: map[string]Rule{"aws-access-key": {
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
 					Regex:       regexp.MustCompile("(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}"),
-					Keywords:    []string{"puppy"},
-					Tags:        []string{"key", "AWS"},
+					Keywords:    []string{},
+					Tags:        []string{"key", "AWS", "puppy"},
 				},
 				},
 				},
 				},
 			},
 			},
 		},
 		},
 		{
 		{
-			cfgName: "extend_disabled",
+			cfgName: "valid/extend_rule_allowlist_or",
 			cfg: Config{
 			cfg: Config{
+				Title: "gitleaks extended 3",
 				Rules: map[string]Rule{
 				Rules: map[string]Rule{
-					"aws-secret-key": {
-						RuleID:   "aws-secret-key",
-						Regex:    regexp.MustCompile(`(?i)aws_(.{0,20})?=?.[\'\"0-9a-zA-Z\/+]{40}`),
-						Tags:     []string{"key", "AWS"},
-						Keywords: []string{},
+					"aws-secret-key-again-again": {
+						RuleID:      "aws-secret-key-again-again",
+						Description: "AWS Secret Key",
+						Regex:       regexp.MustCompile(`(?i)aws_(.{0,20})?=?.[\'\"0-9a-zA-Z\/+]{40}`),
+						Keywords:    []string{},
+						Tags:        []string{"key", "AWS"},
+						Allowlists: []*Allowlist{
+							{
+								MatchCondition: AllowlistMatchOr,
+								StopWords:      []string{"fake"},
+							},
+							{
+								MatchCondition: AllowlistMatchOr,
+								Commits:        []string{"abcdefg1"},
+								Paths:          []*regexp.Regexp{regexp.MustCompile(`ignore\.xaml`)},
+								Regexes:        []*regexp.Regexp{regexp.MustCompile(`foo.+bar`)},
+								RegexTarget:    "line",
+								StopWords:      []string{"example"},
+							},
+						},
 					},
 					},
-					"pypi-upload-token": {
-						RuleID:   "pypi-upload-token",
-						Regex:    regexp.MustCompile(`pypi-AgEIcHlwaS5vcmc[A-Za-z0-9\-_]{50,1000}`),
-						Tags:     []string{},
-						Keywords: []string{},
+				},
+			},
+		},
+		{
+			cfgName: "valid/extend_rule_allowlist_and",
+			cfg: Config{
+				Title: "gitleaks extended 3",
+				Rules: map[string]Rule{
+					"aws-secret-key-again-again": {
+						RuleID:      "aws-secret-key-again-again",
+						Description: "AWS Secret Key",
+						Regex:       regexp.MustCompile(`(?i)aws_(.{0,20})?=?.[\'\"0-9a-zA-Z\/+]{40}`),
+						Keywords:    []string{},
+						Tags:        []string{"key", "AWS"},
+						Allowlists: []*Allowlist{
+							{
+								MatchCondition: AllowlistMatchOr,
+								StopWords:      []string{"fake"},
+							},
+							{
+								MatchCondition: AllowlistMatchAnd,
+								Commits:        []string{"abcdefg1"},
+								Paths:          []*regexp.Regexp{regexp.MustCompile(`ignore\.xaml`)},
+								Regexes:        []*regexp.Regexp{regexp.MustCompile(`foo.+bar`)},
+								RegexTarget:    "line",
+								StopWords:      []string{"example"},
+							},
+						},
 					},
 					},
 				},
 				},
 			},
 			},
 		},
 		},
+
+		// Invalid
 	}
 	}
 
 
 	for _, tt := range tests {
 	for _, tt := range tests {
 		t.Run(tt.cfgName, func(t *testing.T) {
 		t.Run(tt.cfgName, func(t *testing.T) {
-			t.Cleanup(func() {
-				extendDepth = 0
-				viper.Reset()
-			})
+			testTranslate(t, tt)
+		})
+	}
+}
 
 
-			viper.AddConfigPath(configPath)
-			viper.SetConfigName(tt.cfgName)
-			viper.SetConfigType("toml")
-			err := viper.ReadInConfig()
-			require.NoError(t, err)
+func testTranslate(t *testing.T, test translateCase) {
+	t.Helper()
+	t.Cleanup(func() {
+		extendDepth = 0
+		viper.Reset()
+	})
 
 
-			var vc ViperConfig
-			err = viper.Unmarshal(&vc)
-			require.NoError(t, err)
-			cfg, err := vc.Translate()
-			if err != nil && !assert.EqualError(t, tt.wantError, err.Error()) {
-				return
-			}
+	viper.AddConfigPath(configPath)
+	viper.SetConfigName(test.cfgName)
+	viper.SetConfigType("toml")
+	err := viper.ReadInConfig()
+	require.NoError(t, err)
 
 
-			if len(tt.rules) > 0 {
-				rules := make(map[string]Rule)
-				for _, name := range tt.rules {
-					rules[name] = cfg.Rules[name]
-				}
-				cfg.Rules = rules
-			}
+	var vc ViperConfig
+	err = viper.Unmarshal(&vc)
+	require.NoError(t, err)
+	cfg, err := vc.Translate()
+	if err != nil && !assert.EqualError(t, err, test.wantError.Error()) {
+		return
+	}
 
 
-			var regexComparer = func(x, y *regexp.Regexp) bool {
-				if x == nil || y == nil {
-					return x == y
-				}
-				return x.String() == y.String()
-			}
-			opts := cmp.Options{
-				cmp.Comparer(regexComparer),
-				cmpopts.IgnoreUnexported(Rule{}, Allowlist{}),
-			}
-			if diff := cmp.Diff(tt.cfg.Rules, cfg.Rules, opts); diff != "" {
-				t.Errorf("%s diff: (-want +got)\n%s", tt.cfgName, diff)
-			}
-		})
+	if len(test.rules) > 0 {
+		rules := make(map[string]Rule)
+		for _, name := range test.rules {
+			rules[name] = cfg.Rules[name]
+		}
+		cfg.Rules = rules
+	}
+
+	opts := cmp.Options{
+		cmp.Comparer(regexComparer),
+		cmpopts.IgnoreUnexported(Rule{}, Allowlist{}),
+	}
+	if diff := cmp.Diff(test.cfg.Title, cfg.Title); diff != "" {
+		t.Errorf("%s diff: (-want +got)\n%s", test.cfgName, diff)
+	}
+	if diff := cmp.Diff(test.cfg.Rules, cfg.Rules, opts); diff != "" {
+		t.Errorf("%s diff: (-want +got)\n%s", test.cfgName, diff)
+	}
+	if diff := cmp.Diff(test.cfg.Allowlists, cfg.Allowlists, opts); diff != "" {
+		t.Errorf("%s diff: (-want +got)\n%s", test.cfgName, diff)
 	}
 	}
 }
 }
 
 
@@ -422,12 +624,12 @@ func TestExtendedRuleKeywordsAreDowncase(t *testing.T) {
 	}{
 	}{
 		{
 		{
 			name:             "Extend base rule that includes AWS keyword with new attribute",
 			name:             "Extend base rule that includes AWS keyword with new attribute",
-			cfgName:          "extend_base_rule_including_keysword_with_attribute",
+			cfgName:          "valid/extend_base_rule_including_keywords_with_attribute",
 			expectedKeywords: "aws",
 			expectedKeywords: "aws",
 		},
 		},
 		{
 		{
 			name:             "Extend base with a new rule with CMS keyword",
 			name:             "Extend base with a new rule with CMS keyword",
-			cfgName:          "extend_with_new_rule",
+			cfgName:          "valid/extend_rule_new",
 			expectedKeywords: "cms",
 			expectedKeywords: "cms",
 		},
 		},
 	}
 	}

+ 41 - 18
config/gitleaks.toml

@@ -13,23 +13,9 @@
 
 
 title = "gitleaks config"
 title = "gitleaks config"
 
 
+# TODO: change to [[allowlists]]
 [allowlist]
 [allowlist]
 description = "global allow lists"
 description = "global allow lists"
-regexes = [
-    '''(?i)^true|false|null$''',
-    '''^(?i:a+|b+|c+|d+|e+|f+|g+|h+|i+|j+|k+|l+|m+|n+|o+|p+|q+|r+|s+|t+|u+|v+|w+|x+|y+|z+|\*+|\.+)$''',
-    '''^\$(?:\d+|{\d+})$''',
-    '''^\$(?:[A-Z_]+|[a-z_]+)$''',
-    '''^\${(?:[A-Z_]+|[a-z_]+)}$''',
-    '''^\{\{[ \t]*[\w ().|]+[ \t]*}}$''',
-    '''^\$\{\{[ \t]*(?:(?:env|github|secrets|vars)(?:\.[A-Za-z]\w+)+[\w "'&./=|]*)[ \t]*}}$''',
-    '''^%(?:[A-Z_]+|[a-z_]+)%$''',
-    '''^%[+\-# 0]?[bcdeEfFgGoOpqstTUvxX]$''',
-    '''^\{\d{0,2}}$''',
-    '''^@(?:[A-Z_]+|[a-z_]+)@$''',
-    '''^/Users/(?i)[a-z0-9]+/[\w .-/]+$''',
-    '''^/(?:bin|etc|home|opt|tmp|usr|var)/[\w ./-]+$''',
-]
 paths = [
 paths = [
     '''gitleaks\.toml''',
     '''gitleaks\.toml''',
     '''(?i)\.(?:bmp|gif|jpe?g|png|svg|tiff?)$''',
     '''(?i)\.(?:bmp|gif|jpe?g|png|svg|tiff?)$''',
@@ -56,11 +42,33 @@ paths = [
     '''verification-metadata\.xml''',
     '''verification-metadata\.xml''',
     '''Database.refactorlog''',
     '''Database.refactorlog''',
 ]
 ]
+regexes = [
+    '''(?i)^true|false|null$''',
+    '''^(?i:a+|b+|c+|d+|e+|f+|g+|h+|i+|j+|k+|l+|m+|n+|o+|p+|q+|r+|s+|t+|u+|v+|w+|x+|y+|z+|\*+|\.+)$''',
+    '''^\$(?:\d+|{\d+})$''',
+    '''^\$(?:[A-Z_]+|[a-z_]+)$''',
+    '''^\${(?:[A-Z_]+|[a-z_]+)}$''',
+    '''^\{\{[ \t]*[\w ().|]+[ \t]*}}$''',
+    '''^\$\{\{[ \t]*(?:(?:env|github|secrets|vars)(?:\.[A-Za-z]\w+)+[\w "'&./=|]*)[ \t]*}}$''',
+    '''^%(?:[A-Z_]+|[a-z_]+)%$''',
+    '''^%[+\-# 0]?[bcdeEfFgGoOpqstTUvxX]$''',
+    '''^\{\d{0,2}}$''',
+    '''^@(?:[A-Z_]+|[a-z_]+)@$''',
+    '''^/Users/(?i)[a-z0-9]+/[\w .-/]+$''',
+    '''^/(?:bin|etc|home|opt|tmp|usr|var)/[\w ./-]+$''',
+]
 stopwords = [
 stopwords = [
     "abcdefghijklmnopqrstuvwxyz",
     "abcdefghijklmnopqrstuvwxyz",
     "014df517-39d1-4453-b7b3-9930c563627c",
     "014df517-39d1-4453-b7b3-9930c563627c",
 ]
 ]
 
 
+[[rules]]
+id = "1password-secret-key"
+description = "Uncovered a possible 1Password secret key, potentially compromising access to secrets in vaults."
+regex = '''\bA3-[A-Z0-9]{6}-(?:(?:[A-Z0-9]{11})|(?:[A-Z0-9]{6}-[A-Z0-9]{5}))-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}\b'''
+entropy = 3.8
+keywords = ["a3-"]
+
 [[rules]]
 [[rules]]
 id = "1password-service-account-token"
 id = "1password-service-account-token"
 description = "Uncovered a possible 1Password service account token, potentially compromising access to secrets in vaults."
 description = "Uncovered a possible 1Password service account token, potentially compromising access to secrets in vaults."
@@ -217,6 +225,13 @@ regex = '''[\w.-]{0,50}?(?i:[\w.-]{0,50}?(?:(?-i:[Mm]eraki|MERAKI))(?:[ \t\w.-]{
 entropy = 3
 entropy = 3
 keywords = ["meraki"]
 keywords = ["meraki"]
 
 
+[[rules]]
+id = "clickhouse-cloud-api-secret-key"
+description = "Identified a pattern that may indicate clickhouse cloud API secret key, risking unauthorized clickhouse cloud api access and data breaches on ClickHouse Cloud platforms."
+regex = '''\b(4b1d[A-Za-z0-9]{38})\b'''
+entropy = 3
+keywords = ["4b1d"]
+
 [[rules]]
 [[rules]]
 id = "clojars-api-token"
 id = "clojars-api-token"
 description = "Uncovered a possible Clojars API token, risking unauthorized access to Clojure libraries and potential code manipulation."
 description = "Uncovered a possible Clojars API token, risking unauthorized access to Clojure libraries and potential code manipulation."
@@ -545,7 +560,7 @@ keywords = ["freshbooks"]
 id = "gcp-api-key"
 id = "gcp-api-key"
 description = "Uncovered a GCP API key, which could lead to unauthorized access to Google Cloud services and data breaches."
 description = "Uncovered a GCP API key, which could lead to unauthorized access to Google Cloud services and data breaches."
 regex = '''\b(AIza[\w-]{35})(?:[\x60'"\s;]|\\[nr]|$)'''
 regex = '''\b(AIza[\w-]{35})(?:[\x60'"\s;]|\\[nr]|$)'''
-entropy = 3
+entropy = 4
 keywords = ["aiza"]
 keywords = ["aiza"]
 [[rules.allowlists]]
 [[rules.allowlists]]
 regexes = [
 regexes = [
@@ -589,6 +604,7 @@ regexes = [
     '''^[a-zA-Z_.-]+$''',
     '''^[a-zA-Z_.-]+$''',
 ]
 ]
 [[rules.allowlists]]
 [[rules.allowlists]]
+description = "Allowlist for Generic API Keys"
 regexTarget = "match"
 regexTarget = "match"
 regexes = [
 regexes = [
     '''(?i)(?:access(?:ibility|or)|access[_.-]?id|random[_.-]?access|api[_.-]?(?:id|name|version)|rapid|capital|[a-z0-9-]*?api[a-z0-9-]*?:jar:|author|X-MS-Exchange-Organization-Auth|Authentication-Results|(?:credentials?[_.-]?id|withCredentials)|(?:bucket|foreign|hot|idx|natural|primary|pub(?:lic)?|schema|sequence)[_.-]?key|(?:turkey)|key[_.-]?(?:alias|board|code|frame|id|length|mesh|name|pair|press(?:ed)?|ring|selector|signature|size|stone|storetype|word|up|down|left|right)|key[_.-]?vault[_.-]?(?:id|name)|keyVaultToStoreSecrets|key(?:store|tab)[_.-]?(?:file|path)|issuerkeyhash|(?-i:[DdMm]onkey|[DM]ONKEY)|keying|(?:secret)[_.-]?(?:length|name|size)|UserSecretsId|(?:csrf)[_.-]?token|(?:io\.jsonwebtoken[ \t]?:[ \t]?[\w-]+)|(?:api|credentials|token)[_.-]?(?:endpoint|ur[il])|public[_.-]?token|(?:key|token)[_.-]?file|(?-i:(?:[A-Z_]+=\n[A-Z_]+=|[a-z_]+=\n[a-z_]+=)(?:\n|\z))|(?-i:(?:[A-Z.]+=\n[A-Z.]+=|[a-z.]+=\n[a-z.]+=)(?:\n|\z)))''',
     '''(?i)(?:access(?:ibility|or)|access[_.-]?id|random[_.-]?access|api[_.-]?(?:id|name|version)|rapid|capital|[a-z0-9-]*?api[a-z0-9-]*?:jar:|author|X-MS-Exchange-Organization-Auth|Authentication-Results|(?:credentials?[_.-]?id|withCredentials)|(?:bucket|foreign|hot|idx|natural|primary|pub(?:lic)?|schema|sequence)[_.-]?key|(?:turkey)|key[_.-]?(?:alias|board|code|frame|id|length|mesh|name|pair|press(?:ed)?|ring|selector|signature|size|stone|storetype|word|up|down|left|right)|key[_.-]?vault[_.-]?(?:id|name)|keyVaultToStoreSecrets|key(?:store|tab)[_.-]?(?:file|path)|issuerkeyhash|(?-i:[DdMm]onkey|[DM]ONKEY)|keying|(?:secret)[_.-]?(?:length|name|size)|UserSecretsId|(?:csrf)[_.-]?token|(?:io\.jsonwebtoken[ \t]?:[ \t]?[\w-]+)|(?:api|credentials|token)[_.-]?(?:endpoint|ur[il])|public[_.-]?token|(?:key|token)[_.-]?file|(?-i:(?:[A-Z_]+=\n[A-Z_]+=|[a-z_]+=\n[a-z_]+=)(?:\n|\z))|(?-i:(?:[A-Z.]+=\n[A-Z.]+=|[a-z.]+=\n[a-z.]+=)(?:\n|\z)))''',
@@ -2368,7 +2384,7 @@ keywords = ["kraken"]
 [[rules]]
 [[rules]]
 id = "kubernetes-secret-yaml"
 id = "kubernetes-secret-yaml"
 description = "Possible Kubernetes Secret detected, posing a risk of leaking credentials/tokens from your deployments"
 description = "Possible Kubernetes Secret detected, posing a risk of leaking credentials/tokens from your deployments"
-regex = '''(?i)(?:\bkind:[ \t]*["']?\bsecret\b["']?(?:.|\s){0,200}?\bdata:(?:.|\s){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))|\bdata:(?:.|\s){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))(?:.|\s){0,200}?\bkind:[ \t]*["']?\bsecret\b["']?)'''
+regex = '''(?i)(?:\bkind:[ \t]*["']?\bsecret\b["']?(?s:.){0,200}?\bdata:(?s:.){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))|\bdata:(?s:.){0,100}?\s+([\w.-]+:(?:[ \t]*(?:\||>[-+]?)\s+)?[ \t]*(?:["']?[a-z0-9+/]{10,}={0,3}["']?|\{\{[ \t\w"|$:=,.-]+}}|""|''))(?s:.){0,200}?\bkind:[ \t]*["']?\bsecret\b["']?)'''
 path = '''(?i)\.ya?ml$'''
 path = '''(?i)\.ya?ml$'''
 keywords = ["secret"]
 keywords = ["secret"]
 [[rules.allowlists]]
 [[rules.allowlists]]
@@ -2378,7 +2394,7 @@ regexes = [
 [[rules.allowlists]]
 [[rules.allowlists]]
 regexTarget = "match"
 regexTarget = "match"
 regexes = [
 regexes = [
-    '''(kind:(?:.|\s)+\n---\n(?:.|\s)+\bdata:|data:(?:.|\s)+\n---\n(?:.|\s)+\bkind:)''',
+    '''(kind:(?s:.)+\n---\n(?s:.)+\bdata:|data:(?s:.)+\n---\n(?s:.)+\bkind:)''',
 ]
 ]
 
 
 [[rules]]
 [[rules]]
@@ -2620,6 +2636,13 @@ regex = '''\b(sha256~[\w-]{43})(?:[^\w-]|\z)'''
 entropy = 3.5
 entropy = 3.5
 keywords = ["sha256~"]
 keywords = ["sha256~"]
 
 
+[[rules]]
+id = "perplexity-api-key"
+description = "Detected a Perplexity API key, which could lead to unauthorized access to Perplexity AI services and data exposure."
+regex = '''\b(pplx-[a-zA-Z0-9]{48})(?:[\x60'"\s;]|\\[nr]|$|\b)'''
+entropy = 4
+keywords = ["pplx-"]
+
 [[rules]]
 [[rules]]
 id = "pkcs12-file"
 id = "pkcs12-file"
 description = "Found a PKCS #12 file, which commonly contain bundled private keys."
 description = "Found a PKCS #12 file, which commonly contain bundled private keys."

+ 34 - 0
detect/codec/ascii.go

@@ -0,0 +1,34 @@
+package codec
+
+var printableASCII [256]bool
+
+func init() {
+	for b := 0; b < len(printableASCII); b++ {
+		if '\x08' < b && b < '\x7f' {
+			printableASCII[b] = true
+		}
+	}
+}
+
+// isPrintableASCII returns true if all bytes are printable ASCII
+func isPrintableASCII(b []byte) bool {
+	for _, c := range b {
+		if !printableASCII[c] {
+			return false
+		}
+	}
+
+	return true
+}
+
+// hasByte can be used to check if a string has at least one of the provided
+// bytes. Note: make sure byteset is long enough to handle the largest byte in
+// the string.
+func hasByte(data string, byteset []bool) bool {
+	for i := 0; i < len(data); i++ {
+		if byteset[data[i]] {
+			return true
+		}
+	}
+	return false
+}

+ 39 - 0
detect/codec/base64.go

@@ -0,0 +1,39 @@
+package codec
+
+import (
+	"encoding/base64"
+)
+
+// likelyBase64Chars is a set of characters that you would expect to find at
+// least one of in base64 encoded data. This risks missing about 1% of
+// base64 encoded data that doesn't contain these characters, but gives you
+// the performance gain of not trying to decode a lot of long symbols in code.
+var likelyBase64Chars = make([]bool, 256)
+
+func init() {
+	for _, c := range `0123456789+/-_` {
+		likelyBase64Chars[c] = true
+	}
+}
+
+// decodeBase64 decodes base64 encoded printable ASCII characters
+func decodeBase64(encodedValue string) string {
+	// Exit early if it doesn't seem like base64
+	if !hasByte(encodedValue, likelyBase64Chars) {
+		return ""
+	}
+
+	// Try standard base64 decoding
+	decodedValue, err := base64.StdEncoding.DecodeString(encodedValue)
+	if err == nil && isPrintableASCII(decodedValue) {
+		return string(decodedValue)
+	}
+
+	// Try base64url decoding
+	decodedValue, err = base64.RawURLEncoding.DecodeString(encodedValue)
+	if err == nil && isPrintableASCII(decodedValue) {
+		return string(decodedValue)
+	}
+
+	return ""
+}

+ 105 - 0
detect/codec/decoder.go

@@ -0,0 +1,105 @@
+package codec
+
+import (
+	"bytes"
+
+	"github.com/zricethezav/gitleaks/v8/logging"
+)
+
+// Decoder decodes various types of data in place
+type Decoder struct {
+	decodedMap map[string]string
+}
+
+// NewDecoder creates a default decoder struct
+func NewDecoder() *Decoder {
+	return &Decoder{
+		decodedMap: make(map[string]string),
+	}
+}
+
+// Decode returns the data with the values decoded in place along with the
+// encoded segment meta data for the next pass of decoding
+func (d *Decoder) Decode(data string, predecessors []*EncodedSegment) (string, []*EncodedSegment) {
+	segments := d.findEncodedSegments(data, predecessors)
+
+	if len(segments) > 0 {
+		result := bytes.NewBuffer(make([]byte, 0, len(data)))
+		encodedStart := 0
+		for _, segment := range segments {
+			result.WriteString(data[encodedStart:segment.encoded.start])
+			result.WriteString(segment.decodedValue)
+			encodedStart = segment.encoded.end
+		}
+
+		result.WriteString(data[encodedStart:])
+		return result.String(), segments
+	}
+
+	return data, segments
+}
+
+// findEncodedSegments finds the encoded segments in the data
+func (d *Decoder) findEncodedSegments(data string, predecessors []*EncodedSegment) []*EncodedSegment {
+	if len(data) == 0 {
+		return []*EncodedSegment{}
+	}
+
+	decodedShift := 0
+	encodingMatches := findEncodingMatches(data)
+	segments := make([]*EncodedSegment, 0, len(encodingMatches))
+	for _, m := range encodingMatches {
+		encodedValue := data[m.start:m.end]
+		decodedValue, alreadyDecoded := d.decodedMap[encodedValue]
+
+		if !alreadyDecoded {
+			decodedValue = m.encoding.decode(encodedValue)
+			d.decodedMap[encodedValue] = decodedValue
+		}
+
+		if len(decodedValue) == 0 {
+			continue
+		}
+
+		segment := &EncodedSegment{
+			predecessors: predecessors,
+			original:     toOriginal(predecessors, m.startEnd),
+			encoded:      m.startEnd,
+			decoded: startEnd{
+				m.start + decodedShift,
+				m.start + decodedShift + len(decodedValue),
+			},
+			decodedValue: decodedValue,
+			encodings:    m.encoding.kind,
+			depth:        1,
+		}
+
+		// Shift decoded start and ends based on size changes
+		decodedShift += len(decodedValue) - len(encodedValue)
+
+		// Adjust depth and encoding if applicable
+		if len(segment.predecessors) != 0 {
+			// Set the depth based on the predecessors' depth in the previous pass
+			segment.depth = 1 + segment.predecessors[0].depth
+			// Adjust encodings
+			for _, p := range segment.predecessors {
+				if segment.encoded.overlaps(p.decoded) {
+					segment.encodings |= p.encodings
+				}
+			}
+		}
+
+		segments = append(segments, segment)
+		logging.Debug().
+			Str("decoder", m.encoding.kind.String()).
+			Msgf(
+				"segment found: original=%s pos=%s: %q -> %q",
+				segment.original,
+				segment.encoded,
+				encodedValue,
+				segment.decodedValue,
+			)
+	}
+
+	return segments
+}

+ 44 - 7
detect/decoder_test.go → detect/codec/decoder_test.go

@@ -1,9 +1,10 @@
-package detect
+package codec
 
 
 import (
 import (
-	"testing"
-
+	"encoding/hex"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/assert"
+	"net/url"
+	"testing"
 )
 )
 
 
 func TestDecode(t *testing.T) {
 func TestDecode(t *testing.T) {
@@ -66,8 +67,8 @@ func TestDecode(t *testing.T) {
 		},
 		},
 		{
 		{
 			name:     "b64-url-safe: hyphen url b64",
 			name:     "b64-url-safe: hyphen url b64",
-			chunk:    `dHJ1ZmZsZWhvZz4-ZmluZHMtc2VjcmV0cw`,
-			expected: `trufflehog>>finds-secrets`,
+			chunk:    `Z2l0bGVha3M-PmZpbmRzLXNlY3JldHM`,
+			expected: `gitleaks>>finds-secrets`,
 		},
 		},
 		{
 		{
 			name:     "b64-url-safe: underscore url b64",
 			name:     "b64-url-safe: underscore url b64",
@@ -79,13 +80,49 @@ func TestDecode(t *testing.T) {
 			chunk:    `a3d3fa7c2bb99e469ba55e5834ce79ee4853a8a3`,
 			chunk:    `a3d3fa7c2bb99e469ba55e5834ce79ee4853a8a3`,
 			expected: `a3d3fa7c2bb99e469ba55e5834ce79ee4853a8a3`,
 			expected: `a3d3fa7c2bb99e469ba55e5834ce79ee4853a8a3`,
 		},
 		},
+		{
+			name:     "url encoded value",
+			chunk:    `secret%3D%22q%24%21%40%23%24%25%5E%26%2A%28%20asdf%22`,
+			expected: `secret="q$!@#$%^&*( asdf"`,
+		},
+		{
+			name:     "hex encoded value",
+			chunk:    `secret="466973684D617048756E6B79212121363334"`,
+			expected: `secret="FishMapHunky!!!634"`,
+		},
 	}
 	}
 
 
 	decoder := NewDecoder()
 	decoder := NewDecoder()
+	fullDecode := func(data string) string {
+		segments := []*EncodedSegment{}
+		for {
+			data, segments = decoder.Decode(data, segments)
+			if len(segments) == 0 {
+				return data
+			}
+		}
+	}
+
+	// Test value decoding
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expected, fullDecode(tt.chunk))
+		})
+	}
+
+	// Percent encode the values to test percent decoding
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			encodedChunk := url.PathEscape(tt.chunk)
+			assert.Equal(t, tt.expected, fullDecode(encodedChunk))
+		})
+	}
+
+	// Hex encode the values to test hex decoding
 	for _, tt := range tests {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
-			decoded, _ := decoder.decode(tt.chunk, []EncodedSegment{})
-			assert.Equal(t, tt.expected, decoded)
+			encodedChunk := hex.EncodeToString([]byte(tt.chunk))
+			assert.Equal(t, tt.expected, fullDecode(encodedChunk))
 		})
 		})
 	}
 	}
 }
 }

+ 153 - 0
detect/codec/encodings.go

@@ -0,0 +1,153 @@
+package codec
+
+import (
+	"fmt"
+	"math"
+	"regexp"
+	"strings"
+)
+
+var (
+	// encodingsRe is a regex built by combining all the encoding patterns
+	// into named capture groups so that a single pass can detect multiple
+	// encodings
+	encodingsRe *regexp.Regexp
+	// encodings contains all the encoding configurations for the detector.
+	// The precedence is important. You want more specific encodings to
+	// have a higher precedence or encodings that partially encode the
+	// values (e.g. percent) unlike encodings that fully encode the string
+	// (e.g. base64). If two encoding matches overlap the decoder will use
+	// this order to determine which encoding should wait till the next pass.
+	encodings = []*encoding{
+		&encoding{
+			kind:    percentKind,
+			pattern: `%[0-9A-Fa-f]{2}(?:.*%[0-9A-Fa-f]{2})?`,
+			decode:  decodePercent,
+		},
+		&encoding{
+			kind:    hexKind,
+			pattern: `[0-9A-Fa-f]{32,}`,
+			decode:  decodeHex,
+		},
+		&encoding{
+			kind:    base64Kind,
+			pattern: `[\w\/+-]{16,}={0,2}`,
+			decode:  decodeBase64,
+		},
+	}
+)
+
+// encodingNames is used to map the encodingKinds to their name
+var encodingNames = []string{
+	"percent",
+	"hex",
+	"base64",
+}
+
+// encodingKind can be or'd together to capture all of the unique encodings
+// that were present in a segment
+type encodingKind int
+
+var (
+	// make sure these go up by powers of 2
+	percentKind = encodingKind(1)
+	hexKind     = encodingKind(2)
+	base64Kind  = encodingKind(4)
+)
+
+func (e encodingKind) String() string {
+	i := int(math.Log2(float64(e)))
+	if i >= len(encodingNames) {
+		return ""
+	}
+	return encodingNames[i]
+}
+
+// kinds returns a list of encodingKinds combined in this one
+func (e encodingKind) kinds() []encodingKind {
+	kinds := []encodingKind{}
+
+	for i := 0; i < len(encodingNames); i++ {
+		if kind := int(e) & int(math.Pow(2, float64(i))); kind != 0 {
+			kinds = append(kinds, encodingKind(kind))
+		}
+	}
+
+	return kinds
+}
+
+// encodingMatch represents a match of an encoding in the text
+type encodingMatch struct {
+	encoding *encoding
+	startEnd
+}
+
+// encoding represent a type of coding supported by the decoder.
+type encoding struct {
+	// the kind of decoding (e.g. base64, etc)
+	kind encodingKind
+	// the regex pattern that matches the encoding format
+	pattern string
+	// take the match and return the decoded value
+	decode func(string) string
+	// determine which encoding should win out when two overlap
+	precedence int
+}
+
+func init() {
+	count := len(encodings)
+	namedPatterns := make([]string, count)
+	for i, encoding := range encodings {
+		encoding.precedence = count - i
+		namedPatterns[i] = fmt.Sprintf(
+			"(?P<%s>%s)",
+			encoding.kind,
+			encoding.pattern,
+		)
+	}
+	encodingsRe = regexp.MustCompile(strings.Join(namedPatterns, "|"))
+}
+
+// findEncodingMatches finds as many encodings as it can for this pass
+func findEncodingMatches(data string) []encodingMatch {
+	var all []encodingMatch
+	for _, matchIndex := range encodingsRe.FindAllStringSubmatchIndex(data, -1) {
+		// Add the encodingMatch with its proper encoding
+		for i, j := 2, 0; i < len(matchIndex); i, j = i+2, j+1 {
+			if matchIndex[i] > -1 {
+				all = append(all, encodingMatch{
+					encoding: encodings[j],
+					startEnd: startEnd{
+						start: matchIndex[i],
+						end:   matchIndex[i+1],
+					},
+				})
+			}
+		}
+	}
+
+	totalMatches := len(all)
+	if totalMatches == 1 {
+		return all
+	}
+
+	// filter out lower precedence ones that overlap their neigbors
+	filtered := make([]encodingMatch, 0, len(all))
+	for i, m := range all {
+		if i > 0 {
+			prev := all[i-1]
+			if m.overlaps(prev.startEnd) && prev.encoding.precedence > m.encoding.precedence {
+				continue // skip this one
+			}
+		}
+		if i+1 < totalMatches {
+			next := all[i+1]
+			if m.overlaps(next.startEnd) && next.encoding.precedence > m.encoding.precedence {
+				continue // skip this one
+			}
+		}
+		filtered = append(filtered, m)
+	}
+
+	return filtered
+}

+ 60 - 0
detect/codec/hex.go

@@ -0,0 +1,60 @@
+package codec
+
+// hexMap is a precalculated map of hex nibbles
+const hexMap = "" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" +
+	"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" +
+	"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+
+// likelyHexChars is a set of characters that you would expect to find at
+// least one of in hex encoded data. This risks missing some hex data that
+// doesn't contain these characters, but gives you the performance gain of not
+// trying to decode a lot of long symbols in code.
+var likelyHexChars = make([]bool, 256)
+
+func init() {
+	for _, c := range `0123456789` {
+		likelyHexChars[c] = true
+	}
+}
+
+// decodeHex decodes hex data
+func decodeHex(encodedValue string) string {
+	size := len(encodedValue)
+	// hex should have two characters per byte
+	if size%2 != 0 {
+		return ""
+	}
+	if !hasByte(encodedValue, likelyHexChars) {
+		return ""
+	}
+
+	decodedValue := make([]byte, size/2)
+	for i := 0; i < size; i += 2 {
+		n1 := hexMap[encodedValue[i]]
+		n2 := hexMap[encodedValue[i+1]]
+		if n1|n2 == '\xff' {
+			return ""
+		}
+		b := byte(n1<<4 | n2)
+		if !printableASCII[b] {
+			return ""
+		}
+		decodedValue[i/2] = b
+	}
+
+	return string(decodedValue)
+}

+ 34 - 0
detect/codec/percent.go

@@ -0,0 +1,34 @@
+package codec
+
+// decodePercent decodes percent encoded strings
+func decodePercent(encodedValue string) string {
+	encLen := len(encodedValue)
+	decodedValue := make([]byte, encLen)
+	decIndex := 0
+	encIndex := 0
+
+	for encIndex < encLen {
+		if encodedValue[encIndex] == '%' && encIndex+2 < encLen {
+			n1 := hexMap[encodedValue[encIndex+1]]
+			n2 := hexMap[encodedValue[encIndex+2]]
+			// Make sure they're hex characters
+			if n1|n2 != '\xff' {
+				b := byte(n1<<4 | n2)
+				if !printableASCII[b] {
+					return ""
+				}
+
+				decodedValue[decIndex] = b
+				encIndex += 3
+				decIndex += 1
+				continue
+			}
+		}
+
+		decodedValue[decIndex] = encodedValue[encIndex]
+		encIndex += 1
+		decIndex += 1
+	}
+
+	return string(decodedValue[:decIndex])
+}

+ 173 - 0
detect/codec/segment.go

@@ -0,0 +1,173 @@
+package codec
+
+import (
+	"fmt"
+)
+
+// EncodedSegment represents a portion of text that is encoded in some way.
+type EncodedSegment struct {
+	// predecessors are all of the segments from the previous decoding pass
+	predecessors []*EncodedSegment
+
+	// original start/end indices before decoding
+	original startEnd
+
+	// encoded start/end indices relative to the previous decoding pass.
+	// If it's a top level segment, original and encoded will be the
+	// same.
+	encoded startEnd
+
+	// decoded start/end indices in this pass after decoding
+	decoded startEnd
+
+	// decodedValue contains the decoded string for this segment
+	decodedValue string
+
+	// encodings is the encodings that make up this segment. encodingKind
+	// can be or'd together to hold multiple encodings
+	encodings encodingKind
+
+	// depth is how many decoding passes it took to decode this segment
+	depth int
+}
+
+// Tags returns additional meta data tags related to the types of segments
+func Tags(segments []*EncodedSegment) []string {
+	// Return an empty list if we don't have any segments
+	if len(segments) == 0 {
+		return []string{}
+	}
+
+	// Since decoding is done in passes, the depth of all the segments
+	// should be the same
+	depth := segments[0].depth
+
+	// Collect the encodings from the segments
+	encodings := segments[0].encodings
+	for i := 1; i < len(segments); i++ {
+		encodings |= segments[i].encodings
+	}
+
+	kinds := encodings.kinds()
+	tags := make([]string, len(kinds)+1)
+
+	tags[len(tags)-1] = fmt.Sprintf("decode-depth:%d", depth)
+	for i, kind := range kinds {
+		tags[i] = fmt.Sprintf("decoded:%s", kind)
+	}
+
+	return tags
+}
+
+// CurrentLine returns from the start of the line containing the segments
+// to the end of the line where the segment ends.
+func CurrentLine(segments []*EncodedSegment, currentRaw string) string {
+	// Return the whole thing if no segments are provided
+	if len(segments) == 0 {
+		return currentRaw
+	}
+
+	start := 0
+	end := len(currentRaw)
+
+	// Merge the ranges together into a single decoded value
+	decoded := segments[0].decoded
+	for i := 1; i < len(segments); i++ {
+		decoded = decoded.merge(segments[i].decoded)
+	}
+
+	// Find the start of the range
+	for i := decoded.start; i > -1; i-- {
+		c := currentRaw[i]
+		if c == '\n' {
+			start = i
+			break
+		}
+	}
+
+	// Find the end of the range
+	for i := decoded.end; i < end; i++ {
+		c := currentRaw[i]
+		if c == '\n' {
+			end = i
+			break
+		}
+	}
+
+	return currentRaw[start:end]
+}
+
+// AdjustMatchIndex maps a match index from the current decode pass back to
+// its location in the original text
+func AdjustMatchIndex(segments []*EncodedSegment, matchIndex []int) []int {
+	// Don't adjust if we're not provided any segments
+	if len(segments) == 0 {
+		return matchIndex
+	}
+
+	// Map the match to the location in the original text
+	match := startEnd{matchIndex[0], matchIndex[1]}
+
+	// Map the match to its orignal location
+	adjusted := toOriginal(segments, match)
+
+	// Return the adjusted match index
+	return []int{
+		adjusted.start,
+		adjusted.end,
+	}
+}
+
+// SegmentsWithDecodedOverlap the segments where the start and end overlap its
+// decoded range
+func SegmentsWithDecodedOverlap(segments []*EncodedSegment, start, end int) []*EncodedSegment {
+	se := startEnd{start, end}
+	overlaps := []*EncodedSegment{}
+
+	for _, segment := range segments {
+		if segment.decoded.overlaps(se) {
+			overlaps = append(overlaps, segment)
+		}
+	}
+
+	return overlaps
+}
+
+// toOriginal maps a start/end to its start/end in the original text
+// the provided start/end should be relative to the segment's decoded value
+func toOriginal(predecessors []*EncodedSegment, decoded startEnd) startEnd {
+	if len(predecessors) == 0 {
+		return decoded
+	}
+
+	// Map the decoded value one level up where it was encoded
+	encoded := startEnd{}
+
+	for _, p := range predecessors {
+		if !p.decoded.overlaps(decoded) {
+			continue // Not in scope
+		}
+
+		// If fully contained, return the segments original start/end
+		if p.decoded.contains(decoded) {
+			return p.original
+		}
+
+		// Map the value to be relative to the predecessors's decoded values
+		if encoded.end == 0 {
+			encoded = p.encoded.add(p.decoded.overflow(decoded))
+		} else {
+			encoded = encoded.merge(p.encoded.add(p.decoded.overflow(decoded)))
+		}
+	}
+
+	// Should only get here if the thing passed in wasn't in a decoded
+	// value. This shouldn't be the case
+	if encoded.end == 0 {
+		return decoded
+	}
+
+	// Climb up another level
+	// (NOTE: each segment references all the predecessors)
+	return toOriginal(predecessors[0].predecessors, encoded)
+}

+ 57 - 0
detect/codec/start_end.go

@@ -0,0 +1,57 @@
+package codec
+
+import (
+	"fmt"
+)
+
+// startEnd represents the start and end of some data. It mainly exists as a
+// helper when referencing the values
+type startEnd struct {
+	start int
+	end   int
+}
+
+// sub subtracts the values of two startEnds
+func (s startEnd) sub(o startEnd) startEnd {
+	return startEnd{
+		s.start - o.start,
+		s.end - o.end,
+	}
+}
+
+// add adds the values of two startEnds
+func (s startEnd) add(o startEnd) startEnd {
+	return startEnd{
+		s.start + o.start,
+		s.end + o.end,
+	}
+}
+
+// overlaps returns true if two startEnds overlap
+func (s startEnd) overlaps(o startEnd) bool {
+	return o.start <= s.end && o.end >= s.start
+}
+
+// contains returns true if the other is fully contained within this one
+func (s startEnd) contains(o startEnd) bool {
+	return s.start <= o.start && o.end <= s.end
+}
+
+// overflow returns a startEnd that tells how much the other goes outside the
+// bounds of this one
+func (s startEnd) overflow(o startEnd) startEnd {
+	return s.merge(o).sub(s)
+}
+
+// merge takes two start/ends and returns a single one that encompases both
+func (s startEnd) merge(o startEnd) startEnd {
+	return startEnd{
+		min(s.start, o.start),
+		max(s.end, o.end),
+	}
+}
+
+// String returns a string representation for clearer debugging
+func (s startEnd) String() string {
+	return fmt.Sprintf("[%d,%d]", s.start, s.end)
+}

+ 0 - 306
detect/decoder.go

@@ -1,306 +0,0 @@
-package detect
-
-import (
-	"bytes"
-	"encoding/base64"
-	"fmt"
-	"regexp"
-	"unicode"
-
-	"github.com/zricethezav/gitleaks/v8/logging"
-)
-
-var b64LikelyChars [128]byte
-var b64Regexp = regexp.MustCompile(`[\w/+-]{16,}={0,3}`)
-var decoders = []func(string) ([]byte, error){
-	base64.StdEncoding.DecodeString,
-	base64.RawURLEncoding.DecodeString,
-}
-
-func init() {
-	// Basically look for anything that isn't just letters
-	for _, c := range `0123456789+/-_` {
-		b64LikelyChars[c] = 1
-	}
-}
-
-// EncodedSegment represents a portion of text that is encoded in some way.
-// `decode` supports recusive decoding and can result in "segment trees".
-// There can be multiple segments in the original text, so each can be thought
-// of as its own tree with the root being the original segment.
-type EncodedSegment struct {
-	// The parent segment in a segment tree. If nil, it is a root segment
-	parent *EncodedSegment
-
-	// Relative start/end are the bounds of the encoded value in the current pass.
-	relativeStart int
-	relativeEnd   int
-
-	// Absolute start/end refer to the bounds of the root segment in this segment
-	// tree
-	absoluteStart int
-	absoluteEnd   int
-
-	// Decoded start/end refer to the bounds of the decoded value in the current
-	// pass. These can differ from relative values because decoding can shrink
-	// or grow the size of the segment.
-	decodedStart int
-	decodedEnd   int
-
-	// This is the actual decoded content in the segment
-	decodedValue string
-
-	// This is the type of encoding
-	encoding string
-}
-
-// isChildOf inspects the bounds of two segments to determine
-// if one should be the child of another
-func (s EncodedSegment) isChildOf(parent EncodedSegment) bool {
-	return parent.decodedStart <= s.relativeStart && parent.decodedEnd >= s.relativeEnd
-}
-
-// decodedOverlaps checks if the decoded bounds of the segment overlaps a range
-func (s EncodedSegment) decodedOverlaps(start, end int) bool {
-	return start <= s.decodedEnd && end >= s.decodedStart
-}
-
-// adjustMatchIndex takes the matchIndex from the current decoding pass and
-// updates it to match the absolute matchIndex in the original text.
-func (s EncodedSegment) adjustMatchIndex(matchIndex []int) []int {
-	// The match is within the bounds of the segment so we just return
-	// the absolute start and end of the root segment.
-	if s.decodedStart <= matchIndex[0] && matchIndex[1] <= s.decodedEnd {
-		return []int{
-			s.absoluteStart,
-			s.absoluteEnd,
-		}
-	}
-
-	// Since it overlaps one side and/or the other, we're going to have to adjust
-	// and climb parents until we're either at the root or we've determined
-	// we're fully inside one of the parent segments.
-	adjustedMatchIndex := make([]int, 2)
-
-	if matchIndex[0] < s.decodedStart {
-		// It starts before the encoded segment so adjust the start to match
-		// the location before it was decoded
-		matchStartDelta := s.decodedStart - matchIndex[0]
-		adjustedMatchIndex[0] = s.relativeStart - matchStartDelta
-	} else {
-		// It starts within the encoded segment so set the bound to the
-		// relative start
-		adjustedMatchIndex[0] = s.relativeStart
-	}
-
-	if matchIndex[1] > s.decodedEnd {
-		// It ends after the encoded segment so adjust the end to match
-		// the location before it was decoded
-		matchEndDelta := matchIndex[1] - s.decodedEnd
-		adjustedMatchIndex[1] = s.relativeEnd + matchEndDelta
-	} else {
-		// It ends within the encoded segment so set the bound to the relative end
-		adjustedMatchIndex[1] = s.relativeEnd
-	}
-
-	// We're still not at a root segment so we'll need to keep on adjusting
-	if s.parent != nil {
-		return s.parent.adjustMatchIndex(adjustedMatchIndex)
-	}
-
-	return adjustedMatchIndex
-}
-
-// depth reports how many levels of decoding needed to be done (default is 1)
-func (s EncodedSegment) depth() int {
-	depth := 1
-
-	// Climb the tree and increment the depth
-	for current := &s; current.parent != nil; current = current.parent {
-		depth++
-	}
-
-	return depth
-}
-
-// tags returns additional meta data tags related to the types of segments
-func (s EncodedSegment) tags() []string {
-	return []string{
-		fmt.Sprintf("decoded:%s", s.encoding),
-		fmt.Sprintf("decode-depth:%d", s.depth()),
-	}
-}
-
-// Decoder decodes various types of data in place
-type Decoder struct {
-	decodedMap map[string]string
-}
-
-// NewDecoder creates a default decoder struct
-func NewDecoder() *Decoder {
-	return &Decoder{
-		decodedMap: make(map[string]string),
-	}
-}
-
-// decode returns the data with the values decoded in-place
-func (d *Decoder) decode(data string, parentSegments []EncodedSegment) (string, []EncodedSegment) {
-	segments := d.findEncodedSegments(data, parentSegments)
-
-	if len(segments) > 0 {
-		result := bytes.NewBuffer(make([]byte, 0, len(data)))
-
-		relativeStart := 0
-		for _, segment := range segments {
-			result.WriteString(data[relativeStart:segment.relativeStart])
-			result.WriteString(segment.decodedValue)
-			relativeStart = segment.relativeEnd
-		}
-		result.WriteString(data[relativeStart:])
-
-		return result.String(), segments
-	}
-
-	return data, segments
-}
-
-// findEncodedSegments finds the encoded segments in the data and updates the
-// segment tree for this pass
-func (d *Decoder) findEncodedSegments(data string, parentSegments []EncodedSegment) []EncodedSegment {
-	if len(data) == 0 {
-		return []EncodedSegment{}
-	}
-
-	matchIndices := b64Regexp.FindAllStringIndex(data, -1)
-	if matchIndices == nil {
-		return []EncodedSegment{}
-	}
-
-	segments := make([]EncodedSegment, 0, len(matchIndices))
-
-	// Keeps up with offsets from the text changing size as things are decoded
-	decodedShift := 0
-
-	for _, matchIndex := range matchIndices {
-		encodedValue := data[matchIndex[0]:matchIndex[1]]
-
-		if !isLikelyB64(encodedValue) {
-			d.decodedMap[encodedValue] = ""
-			continue
-		}
-
-		decodedValue, alreadyDecoded := d.decodedMap[encodedValue]
-
-		// We haven't decoded this yet, so go ahead and decode it
-		if !alreadyDecoded {
-			decodedValue = decodeValue(encodedValue)
-			d.decodedMap[encodedValue] = decodedValue
-		}
-
-		// Skip this segment because there was nothing to check
-		if len(decodedValue) == 0 {
-			continue
-		}
-
-		// Create a segment for the encoded data
-		segment := EncodedSegment{
-			relativeStart: matchIndex[0],
-			relativeEnd:   matchIndex[1],
-			absoluteStart: matchIndex[0],
-			absoluteEnd:   matchIndex[1],
-			decodedStart:  matchIndex[0] + decodedShift,
-			decodedEnd:    matchIndex[0] + decodedShift + len(decodedValue),
-			decodedValue:  decodedValue,
-			encoding:      "base64",
-		}
-
-		// Shift decoded start and ends based on size changes
-		decodedShift += len(decodedValue) - len(encodedValue)
-
-		// Adjust the absolute position of segments contained in parent segments
-		for _, parentSegment := range parentSegments {
-			if segment.isChildOf(parentSegment) {
-				segment.absoluteStart = parentSegment.absoluteStart
-				segment.absoluteEnd = parentSegment.absoluteEnd
-				segment.parent = &parentSegment
-				break
-			}
-		}
-
-		logging.Debug().Msgf("segment found: %#v", segment)
-		segments = append(segments, segment)
-	}
-
-	return segments
-}
-
-// decoders tries a list of decoders and returns the first successful one
-func decodeValue(encodedValue string) string {
-	for _, decoder := range decoders {
-		decodedValue, err := decoder(encodedValue)
-
-		if err == nil && len(decodedValue) > 0 && isASCII(decodedValue) {
-			return string(decodedValue)
-		}
-	}
-
-	return ""
-}
-
-func isASCII(b []byte) bool {
-	for i := 0; i < len(b); i++ {
-		if b[i] > unicode.MaxASCII || b[i] < '\t' {
-			return false
-		}
-	}
-
-	return true
-}
-
-// Skip a lot of method signatures and things at the risk of missing about
-// 1% of base64
-func isLikelyB64(s string) bool {
-	for _, c := range s {
-		if b64LikelyChars[c] != 0 {
-			return true
-		}
-	}
-
-	return false
-}
-
-// Find a segment where the decoded bounds overlaps a range
-func segmentWithDecodedOverlap(encodedSegments []EncodedSegment, start, end int) *EncodedSegment {
-	for _, segment := range encodedSegments {
-		if segment.decodedOverlaps(start, end) {
-			return &segment
-		}
-	}
-
-	return nil
-}
-
-func (s EncodedSegment) currentLine(currentRaw string) string {
-	start := 0
-	end := len(currentRaw)
-
-	// Find the start of the range
-	for i := s.decodedStart; i > -1; i-- {
-		c := currentRaw[i]
-		if c == '\n' {
-			start = i
-			break
-		}
-	}
-
-	// Find the end of the range
-	for i := s.decodedEnd; i < end; i++ {
-		c := currentRaw[i]
-		if c == '\n' {
-			end = i
-			break
-		}
-	}
-
-	return currentRaw[start:end]
-}

+ 183 - 148
detect/detect.go

@@ -9,8 +9,10 @@ import (
 	"strings"
 	"strings"
 	"sync"
 	"sync"
 	"sync/atomic"
 	"sync/atomic"
+	"time"
 
 
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/config"
+	"github.com/zricethezav/gitleaks/v8/detect/codec"
 	"github.com/zricethezav/gitleaks/v8/logging"
 	"github.com/zricethezav/gitleaks/v8/logging"
 	"github.com/zricethezav/gitleaks/v8/regexp"
 	"github.com/zricethezav/gitleaks/v8/regexp"
 	"github.com/zricethezav/gitleaks/v8/report"
 	"github.com/zricethezav/gitleaks/v8/report"
@@ -25,6 +27,10 @@ import (
 const (
 const (
 	gitleaksAllowSignature = "gitleaks:allow"
 	gitleaksAllowSignature = "gitleaks:allow"
 	chunkSize              = 100 * 1_000 // 100kb
 	chunkSize              = 100 * 1_000 // 100kb
+
+	// SlowWarningThreshold is the amount of time to wait before logging that a file is slow.
+	// This is useful for identifying problematic files and tuning the allowlist.
+	SlowWarningThreshold = 5 * time.Second
 )
 )
 
 
 var (
 var (
@@ -212,26 +218,39 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 	}
 	}
 	d.TotalBytes.Add(uint64(len(fragment.Bytes)))
 	d.TotalBytes.Add(uint64(len(fragment.Bytes)))
 
 
-	var findings []report.Finding
+	var (
+		findings []report.Finding
+		logger   = func() zerolog.Logger {
+			l := logging.With().Str("path", fragment.FilePath)
+			if fragment.CommitSHA != "" {
+				l = l.Str("commit", fragment.CommitSHA)
+			}
+			return l.Logger()
+		}()
+	)
 
 
 	// check if filepath is allowed
 	// check if filepath is allowed
 	if fragment.FilePath != "" {
 	if fragment.FilePath != "" {
 		// is the path our config or baseline file?
 		// is the path our config or baseline file?
-		if fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath) ||
-			// is the path excluded by the global allowlist?
-			(d.Config.Allowlist.PathAllowed(fragment.FilePath) || (fragment.WindowsFilePath != "" && d.Config.Allowlist.PathAllowed(fragment.WindowsFilePath))) {
+		if fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath) {
+			logging.Trace().Msg("skipping file: matches config or baseline path")
 			return findings
 			return findings
 		}
 		}
 	}
 	}
+	// check if commit or filepath is allowed.
+	if isAllowed, event := checkCommitOrPathAllowed(logger, fragment, d.Config.Allowlists); isAllowed {
+		event.Msg("skipping file: global allowlist")
+		return findings
+	}
 
 
 	// add newline indices for location calculation in detectRule
 	// add newline indices for location calculation in detectRule
 	fragment.newlineIndices = newLineRegexp.FindAllStringIndex(fragment.Raw, -1)
 	fragment.newlineIndices = newLineRegexp.FindAllStringIndex(fragment.Raw, -1)
 
 
 	// setup variables to handle different decoding passes
 	// setup variables to handle different decoding passes
 	currentRaw := fragment.Raw
 	currentRaw := fragment.Raw
-	encodedSegments := []EncodedSegment{}
+	encodedSegments := []*codec.EncodedSegment{}
 	currentDecodeDepth := 0
 	currentDecodeDepth := 0
-	decoder := NewDecoder()
+	decoder := codec.NewDecoder()
 
 
 	for {
 	for {
 		// build keyword map for prefiltering rules
 		// build keyword map for prefiltering rules
@@ -268,7 +287,7 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 		}
 		}
 
 
 		// decode the currentRaw for the next pass
 		// decode the currentRaw for the next pass
-		currentRaw, encodedSegments = decoder.decode(currentRaw, encodedSegments)
+		currentRaw, encodedSegments = decoder.Decode(currentRaw, encodedSegments)
 
 
 		// stop the loop when there's nothing else to decode
 		// stop the loop when there's nothing else to decode
 		if len(encodedSegments) == 0 {
 		if len(encodedSegments) == 0 {
@@ -280,7 +299,7 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 }
 }
 
 
 // detectRule scans the given fragment for the given rule and returns a list of findings
 // detectRule scans the given fragment for the given rule and returns a list of findings
-func (d *Detector) detectRule(fragment Fragment, currentRaw string, r config.Rule, encodedSegments []EncodedSegment) []report.Finding {
+func (d *Detector) detectRule(fragment Fragment, currentRaw string, r config.Rule, encodedSegments []*codec.EncodedSegment) []report.Finding {
 	var (
 	var (
 		findings []report.Finding
 		findings []report.Finding
 		logger   = func() zerolog.Logger {
 		logger   = func() zerolog.Logger {
@@ -292,46 +311,10 @@ func (d *Detector) detectRule(fragment Fragment, currentRaw string, r config.Rul
 		}()
 		}()
 	)
 	)
 
 
-	// check if filepath or commit is allowed for this rule
-	for _, a := range r.Allowlists {
-		var (
-			isAllowed             bool
-			commitAllowed, commit = a.CommitAllowed(fragment.CommitSHA)
-			pathAllowed           = a.PathAllowed(fragment.FilePath) || (fragment.WindowsFilePath != "" && a.PathAllowed(fragment.WindowsFilePath))
-		)
-		if a.MatchCondition == config.AllowlistMatchAnd {
-			// Determine applicable checks.
-			var allowlistChecks []bool
-			if len(a.Commits) > 0 {
-				allowlistChecks = append(allowlistChecks, commitAllowed)
-			}
-			if len(a.Paths) > 0 {
-				allowlistChecks = append(allowlistChecks, pathAllowed)
-			}
-			// These will be checked later.
-			if len(a.Regexes) > 0 {
-				allowlistChecks = append(allowlistChecks, false)
-			}
-			if len(a.StopWords) > 0 {
-				allowlistChecks = append(allowlistChecks, false)
-			}
-
-			// Check if allowed.
-			isAllowed = allTrue(allowlistChecks)
-		} else {
-			isAllowed = commitAllowed || pathAllowed
-		}
-		if isAllowed {
-			event := logger.Trace().Str("condition", a.MatchCondition.String())
-			if commitAllowed {
-				event.Str("allowed-commit", commit)
-			}
-			if pathAllowed {
-				event.Bool("allowed-path", pathAllowed)
-			}
-			event.Msg("skipping file: rule allowlist")
-			return findings
-		}
+	// check if commit or file is allowed for this rule.
+	if isAllowed, event := checkCommitOrPathAllowed(logger, fragment, r.Allowlists); isAllowed {
+		event.Msg("skipping file: rule allowlist")
+		return findings
 	}
 	}
 
 
 	if r.Path != nil {
 	if r.Path != nil {
@@ -377,7 +360,6 @@ func (d *Detector) detectRule(fragment Fragment, currentRaw string, r config.Rul
 
 
 	// use currentRaw instead of fragment.Raw since this represents the current
 	// use currentRaw instead of fragment.Raw since this represents the current
 	// decoding pass on the text
 	// decoding pass on the text
-MatchLoop:
 	for _, matchIndex := range r.Regex.FindAllStringIndex(currentRaw, -1) {
 	for _, matchIndex := range r.Regex.FindAllStringIndex(currentRaw, -1) {
 		// Extract secret from match
 		// Extract secret from match
 		secret := strings.Trim(currentRaw[matchIndex[0]:matchIndex[1]], "\n")
 		secret := strings.Trim(currentRaw[matchIndex[0]:matchIndex[1]], "\n")
@@ -389,14 +371,15 @@ MatchLoop:
 		// Check if the decoded portions of the segment overlap with the match
 		// Check if the decoded portions of the segment overlap with the match
 		// to see if its potentially a new match
 		// to see if its potentially a new match
 		if len(encodedSegments) > 0 {
 		if len(encodedSegments) > 0 {
-			if segment := segmentWithDecodedOverlap(encodedSegments, matchIndex[0], matchIndex[1]); segment != nil {
-				matchIndex = segment.adjustMatchIndex(matchIndex)
-				metaTags = append(metaTags, segment.tags()...)
-				currentLine = segment.currentLine(currentRaw)
-			} else {
+			segments := codec.SegmentsWithDecodedOverlap(encodedSegments, matchIndex[0], matchIndex[1])
+			if len(segments) == 0 {
 				// This item has already been added to a finding
 				// This item has already been added to a finding
 				continue
 				continue
 			}
 			}
+
+			matchIndex = codec.AdjustMatchIndex(segments, matchIndex)
+			metaTags = append(metaTags, codec.Tags(segments)...)
+			currentLine = codec.CurrentLine(segments, currentRaw)
 		} else {
 		} else {
 			// Fixes: https://github.com/gitleaks/gitleaks/issues/1352
 			// Fixes: https://github.com/gitleaks/gitleaks/issues/1352
 			// removes the incorrectly following line that was detected by regex expression '\n'
 			// removes the incorrectly following line that was detected by regex expression '\n'
@@ -474,109 +457,22 @@ MatchLoop:
 			}
 			}
 		}
 		}
 
 
-		if d.Config.Allowlist != nil {
-			// check if the regexTarget is defined in the allowlist "regexes" entry
-			// or if the secret is in the list of stopwords
-			globalAllowlistTarget := finding.Secret
-			switch d.Config.Allowlist.RegexTarget {
-			case "match":
-				globalAllowlistTarget = finding.Match
-			case "line":
-				globalAllowlistTarget = currentLine
-			}
-			if d.Config.Allowlist.RegexAllowed(globalAllowlistTarget) {
-				logger.Trace().
-					Str("finding", globalAllowlistTarget).
-					Msg("skipping finding: global allowlist regex")
-				continue
-			} else if ok, word := d.Config.Allowlist.ContainsStopWord(finding.Secret); ok {
-				logger.Trace().
-					Str("finding", finding.Secret).
-					Str("allowed-stopword", word).
-					Msg("skipping finding: global allowlist stopword")
-				continue
-			}
+		// check if the result matches any of the global allowlists.
+		if isAllowed, event := checkFindingAllowed(logger, finding, fragment, currentLine, d.Config.Allowlists); isAllowed {
+			event.Msg("skipping finding: global allowlist")
+			continue
 		}
 		}
 
 
 		// check if the result matches any of the rule allowlists.
 		// check if the result matches any of the rule allowlists.
-		for _, a := range r.Allowlists {
-			allowlistTarget := finding.Secret
-			switch a.RegexTarget {
-			case "match":
-				allowlistTarget = finding.Match
-			case "line":
-				allowlistTarget = currentLine
-			}
-
-			var (
-				isAllowed              bool
-				commitAllowed          bool
-				commit                 string
-				pathAllowed            bool
-				regexAllowed           = a.RegexAllowed(allowlistTarget)
-				containsStopword, word = a.ContainsStopWord(finding.Secret)
-			)
-			// check if the secret is in the list of stopwords
-			if a.MatchCondition == config.AllowlistMatchAnd {
-				// Determine applicable checks.
-				var allowlistChecks []bool
-				if len(a.Commits) > 0 {
-					commitAllowed, commit = a.CommitAllowed(fragment.CommitSHA)
-					allowlistChecks = append(allowlistChecks, commitAllowed)
-				}
-				if len(a.Paths) > 0 {
-					pathAllowed = a.PathAllowed(fragment.FilePath) || (fragment.WindowsFilePath != "" && a.PathAllowed(fragment.WindowsFilePath))
-					allowlistChecks = append(allowlistChecks, pathAllowed)
-				}
-				if len(a.Regexes) > 0 {
-					allowlistChecks = append(allowlistChecks, regexAllowed)
-				}
-				if len(a.StopWords) > 0 {
-					allowlistChecks = append(allowlistChecks, containsStopword)
-				}
-
-				// Check if allowed.
-				isAllowed = allTrue(allowlistChecks)
-			} else {
-				isAllowed = regexAllowed || containsStopword
-			}
-
-			if isAllowed {
-				event := logger.Trace().
-					Str("finding", finding.Secret).
-					Str("condition", a.MatchCondition.String())
-				if commitAllowed {
-					event.Str("allowed-commit", commit)
-				}
-				if pathAllowed {
-					event.Bool("allowed-path", pathAllowed)
-				}
-				if regexAllowed {
-					event.Bool("allowed-regex", regexAllowed)
-				}
-				if containsStopword {
-					event.Str("allowed-stopword", word)
-				}
-				event.Msg("skipping finding: rule allowlist")
-				continue MatchLoop
-			}
+		if isAllowed, event := checkFindingAllowed(logger, finding, fragment, currentLine, r.Allowlists); isAllowed {
+			event.Msg("skipping finding: rule allowlist")
+			continue
 		}
 		}
 		findings = append(findings, finding)
 		findings = append(findings, finding)
 	}
 	}
 	return findings
 	return findings
 }
 }
 
 
-func allTrue(bools []bool) bool {
-	allMatch := true
-	for _, check := range bools {
-		if !check {
-			allMatch = false
-			break
-		}
-	}
-	return allMatch
-}
-
 // AddFinding synchronously adds a finding to the findings slice
 // AddFinding synchronously adds a finding to the findings slice
 func (d *Detector) AddFinding(finding report.Finding) {
 func (d *Detector) AddFinding(finding report.Finding) {
 	globalFingerprint := fmt.Sprintf("%s:%s:%d", finding.File, finding.RuleID, finding.StartLine)
 	globalFingerprint := fmt.Sprintf("%s:%s:%d", finding.File, finding.RuleID, finding.StartLine)
@@ -627,3 +523,142 @@ func (d *Detector) Findings() []report.Finding {
 func (d *Detector) addCommit(commit string) {
 func (d *Detector) addCommit(commit string) {
 	d.commitMap[commit] = true
 	d.commitMap[commit] = true
 }
 }
+
+// checkCommitOrPathAllowed evaluates |fragment| against all provided |allowlists|.
+//
+// If the match condition is "OR", only commit and path are checked.
+// Otherwise, if regexes or stopwords are defined this will fail.
+func checkCommitOrPathAllowed(
+	logger zerolog.Logger,
+	fragment Fragment,
+	allowlists []*config.Allowlist,
+) (bool, *zerolog.Event) {
+	if fragment.FilePath == "" && fragment.CommitSHA == "" {
+		return false, nil
+	}
+
+	for _, a := range allowlists {
+		var (
+			isAllowed        bool
+			allowlistChecks  []bool
+			commitAllowed, _ = a.CommitAllowed(fragment.CommitSHA)
+			pathAllowed      = a.PathAllowed(fragment.FilePath) || (fragment.WindowsFilePath != "" && a.PathAllowed(fragment.WindowsFilePath))
+		)
+		// If the condition is "AND" we need to check all conditions.
+		if a.MatchCondition == config.AllowlistMatchAnd {
+			if len(a.Commits) > 0 {
+				allowlistChecks = append(allowlistChecks, commitAllowed)
+			}
+			if len(a.Paths) > 0 {
+				allowlistChecks = append(allowlistChecks, pathAllowed)
+			}
+			// These will be checked later.
+			if len(a.Regexes) > 0 {
+				continue
+			}
+			if len(a.StopWords) > 0 {
+				continue
+			}
+
+			isAllowed = allTrue(allowlistChecks)
+		} else {
+			isAllowed = commitAllowed || pathAllowed
+		}
+		if isAllowed {
+			event := logger.Trace().Str("condition", a.MatchCondition.String())
+			if commitAllowed {
+				event.Bool("allowed-commit", commitAllowed)
+			}
+			if pathAllowed {
+				event.Bool("allowed-path", pathAllowed)
+			}
+			return true, event
+		}
+	}
+	return false, nil
+}
+
+// checkFindingAllowed evaluates |finding| against all provided |allowlists|.
+//
+// If the match condition is "OR", only regex and stopwords are run. (Commit and path should be handled separately).
+// Otherwise, all conditions are checked.
+//
+// TODO: The method signature is awkward. I can't think of a better way to log helpful info.
+func checkFindingAllowed(
+	logger zerolog.Logger,
+	finding report.Finding,
+	fragment Fragment,
+	currentLine string,
+	allowlists []*config.Allowlist,
+) (bool, *zerolog.Event) {
+	for _, a := range allowlists {
+		allowlistTarget := finding.Secret
+		switch a.RegexTarget {
+		case "match":
+			allowlistTarget = finding.Match
+		case "line":
+			allowlistTarget = currentLine
+		}
+
+		var (
+			checks                 []bool
+			isAllowed              bool
+			commitAllowed          bool
+			commit                 string
+			pathAllowed            bool
+			regexAllowed           = a.RegexAllowed(allowlistTarget)
+			containsStopword, word = a.ContainsStopWord(finding.Secret)
+		)
+		// If the condition is "AND" we need to check all conditions.
+		if a.MatchCondition == config.AllowlistMatchAnd {
+			// Determine applicable checks.
+			if len(a.Commits) > 0 {
+				commitAllowed, commit = a.CommitAllowed(fragment.CommitSHA)
+				checks = append(checks, commitAllowed)
+			}
+			if len(a.Paths) > 0 {
+				pathAllowed = a.PathAllowed(fragment.FilePath) || (fragment.WindowsFilePath != "" && a.PathAllowed(fragment.WindowsFilePath))
+				checks = append(checks, pathAllowed)
+			}
+			if len(a.Regexes) > 0 {
+				checks = append(checks, regexAllowed)
+			}
+			if len(a.StopWords) > 0 {
+				checks = append(checks, containsStopword)
+			}
+
+			isAllowed = allTrue(checks)
+		} else {
+			isAllowed = regexAllowed || containsStopword
+		}
+
+		if isAllowed {
+			event := logger.Trace().
+				Str("finding", finding.Secret).
+				Str("condition", a.MatchCondition.String())
+			if commitAllowed {
+				event.Str("allowed-commit", commit)
+			}
+			if pathAllowed {
+				event.Bool("allowed-path", pathAllowed)
+			}
+			if regexAllowed {
+				event.Bool("allowed-regex", regexAllowed)
+			}
+			if containsStopword {
+				event.Str("allowed-stopword", word)
+			}
+			return true, event
+		}
+	}
+	return false, nil
+}
+
+func allTrue(bools []bool) bool {
+	for _, check := range bools {
+		if !check {
+			return false
+		}
+	}
+	return true
+}

+ 381 - 158
detect/detect_test.go

@@ -17,6 +17,7 @@ import (
 
 
 	"github.com/zricethezav/gitleaks/v8/cmd/scm"
 	"github.com/zricethezav/gitleaks/v8/cmd/scm"
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/config"
+	"github.com/zricethezav/gitleaks/v8/detect/codec"
 	"github.com/zricethezav/gitleaks/v8/logging"
 	"github.com/zricethezav/gitleaks/v8/logging"
 	"github.com/zricethezav/gitleaks/v8/regexp"
 	"github.com/zricethezav/gitleaks/v8/regexp"
 	"github.com/zricethezav/gitleaks/v8/report"
 	"github.com/zricethezav/gitleaks/v8/report"
@@ -26,7 +27,7 @@ import (
 const maxDecodeDepth = 8
 const maxDecodeDepth = 8
 const configPath = "../testdata/config/"
 const configPath = "../testdata/config/"
 const repoBasePath = "../testdata/repos/"
 const repoBasePath = "../testdata/repos/"
-const b64TestValues = `
+const encodedTestValues = `
 # Decoded
 # Decoded
 -----BEGIN PRIVATE KEY-----
 -----BEGIN PRIVATE KEY-----
 135f/bRUBHrbHqLY/xS3I7Oth+8rgG+0tBwfMcbk05Sgxq6QUzSYIQAop+WvsTwk2sR+C38g0Mnb
 135f/bRUBHrbHqLY/xS3I7Oth+8rgG+0tBwfMcbk05Sgxq6QUzSYIQAop+WvsTwk2sR+C38g0Mnb
@@ -44,16 +45,48 @@ eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwiY29uZmlnIjoiVzJ
 c21hbGwtc2VjcmV0
 c21hbGwtc2VjcmV0
 
 
 # This tests how it handles when the match bounds go outside the decoded value
 # This tests how it handles when the match bounds go outside the decoded value
-secret=ZGVjb2RlZC1zZWNyZXQtdmFsdWU=
+secret=ZGVjb2RlZC1zZWNyZXQtdmFsdWUwMA==
 # The above encoded again
 # The above encoded again
 c2VjcmV0PVpHVmpiMlJsWkMxelpXTnlaWFF0ZG1Gc2RXVT0=
 c2VjcmV0PVpHVmpiMlJsWkMxelpXTnlaWFF0ZG1Gc2RXVT0=
 
 
 # Confirm you can ignore on the decoded value
 # Confirm you can ignore on the decoded value
 password="bFJxQkstejVrZjQtcGxlYXNlLWlnbm9yZS1tZS1YLVhJSk0yUGRkdw=="
 password="bFJxQkstejVrZjQtcGxlYXNlLWlnbm9yZS1tZS1YLVhJSk0yUGRkdw=="
+
+# This tests that it can do hex encoded data
+secret=6465636F6465642D7365637265742D76616C756576484558
+
+# This tests that it can do percent encoded data
+## partial encoded data
+secret=decoded-%73%65%63%72%65%74-valuev2
+## scattered encoded
+secret=%64%65coded-%73%65%63%72%65%74-valuev3
+
+# Test multi levels of encoding where the source is a partal encoding
+# it is important that the bounds of the predecessors are properly
+# considered
+## single percent encoding in the middle of multi layer b64
+c2VjcmV0PVpHVmpiMl%4AsWkMxelpXTnlaWFF0ZG1Gc2RXVjJOQT09
+## single percent encoding at the beginning of hex
+secret%3d6465636F6465642D7365637265742D76616C75657635
+## multiple percent encodings in a single layer base64
+secret=ZGVjb2%52lZC1zZWNyZXQtdm%46sdWV4ODY=  # ends in x86
+## base64 encoded partially percent encoded value
+secret=ZGVjb2RlZC0lNzMlNjUlNjMlNzIlNjUlNzQtdmFsdWU=
+## one of the lines above that went through... a lot
+## and there's surrounding text around it
+Look at this value: %4EjMzMjU2NkE2MzZENTYzMDUwNTY3MDQ4%4eTY2RDcwNjk0RDY5NTUzMTRENkQ3ODYx%25%34%65TE3QTQ2MzY1NzZDNjQ0RjY1NTY3MDU5NTU1ODUyNkI2MjUzNTUzMDRFNkU0RTZCNTYzMTU1MzkwQQ== # isn't it crazy?
+## Multi percent encode two random characters close to the bounds of the base64
+## encoded data to make sure that the bounds are still correctly calculated
+secret=ZG%25%32%35%25%33%32%25%33%35%25%32%35%25%33%33%25%33%35%25%32%35%25%33%33%25%33%36%25%32%35%25%33%32%25%33%35%25%32%35%25%33%33%25%33%36%25%32%35%25%33%36%25%33%31%25%32%35%25%33%32%25%33%35%25%32%35%25%33%33%25%33%36%25%32%35%25%33%33%25%33%322RlZC1zZWNyZXQtd%25%36%64%25%34%36%25%37%33dWU=
+## The similar to the above but also touching the edge of the base64
+secret=%25%35%61%25%34%37%25%35%36jb2RlZC1zZWNyZXQtdmFsdWU%25%32%35%25%33%33%25%36%34
+## The similar to the above but also touching and overlapping the base64
+secret%3D%25%35%61%25%34%37%25%35%36jb2RlZC1zZWNyZXQtdmFsdWU%25%32%35%25%33%33%25%36%34
 `
 `
 
 
 func TestDetect(t *testing.T) {
 func TestDetect(t *testing.T) {
-	tests := []struct {
+	logging.Logger = logging.Logger.Level(zerolog.TraceLevel)
+	tests := map[string]struct {
 		cfgName      string
 		cfgName      string
 		baselinePath string
 		baselinePath string
 		fragment     Fragment
 		fragment     Fragment
@@ -65,14 +98,15 @@ func TestDetect(t *testing.T) {
 		expectedFindings []report.Finding
 		expectedFindings []report.Finding
 		wantError        error
 		wantError        error
 	}{
 	}{
-		{
+		// General
+		"valid allow comment (1)": {
 			cfgName: "simple",
 			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `awsToken := \"AKIALALEMEL33243OKIA\ // gitleaks:allow"`,
 				Raw:      `awsToken := \"AKIALALEMEL33243OKIA\ // gitleaks:allow"`,
 				FilePath: "tmp.go",
 				FilePath: "tmp.go",
 			},
 			},
 		},
 		},
-		{
+		"valid allow comment (2)": {
 			cfgName: "simple",
 			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw: `awsToken := \
 				Raw: `awsToken := \
@@ -83,7 +117,7 @@ func TestDetect(t *testing.T) {
 				FilePath: "tmp.go",
 				FilePath: "tmp.go",
 			},
 			},
 		},
 		},
-		{
+		"invalid allow comment": {
 			cfgName: "simple",
 			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw: `awsToken := \"AKIALALEMEL33243OKIA\"
 				Raw: `awsToken := \"AKIALALEMEL33243OKIA\"
@@ -110,30 +144,7 @@ func TestDetect(t *testing.T) {
 				},
 				},
 			},
 			},
 		},
 		},
-		{
-			cfgName: "escaped_character_group",
-			fragment: Fragment{
-				Raw:      `pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`,
-				FilePath: "tmp.go",
-			},
-			expectedFindings: []report.Finding{
-				{
-					Description: "PyPI upload token",
-					Secret:      "pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB",
-					Match:       "pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB",
-					Line:        `pypi-AgEIcHlwaS5vcmcAAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAA-AAAAAAAAAAB`,
-					File:        "tmp.go",
-					RuleID:      "pypi-upload-token",
-					Tags:        []string{"key", "pypi"},
-					StartLine:   0,
-					EndLine:     0,
-					StartColumn: 1,
-					EndColumn:   86,
-					Entropy:     1.9606875,
-				},
-			},
-		},
-		{
+		"detect finding - aws": {
 			cfgName: "simple",
 			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
 				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
@@ -141,22 +152,22 @@ func TestDetect(t *testing.T) {
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
+					RuleID:      "aws-access-key",
 					Description: "AWS Access Key",
 					Description: "AWS Access Key",
-					Secret:      "AKIALALEMEL33243OLIA",
-					Match:       "AKIALALEMEL33243OLIA",
-					Line:        `awsToken := \"AKIALALEMEL33243OLIA\"`,
 					File:        "tmp.go",
 					File:        "tmp.go",
-					RuleID:      "aws-access-key",
-					Tags:        []string{"key", "AWS"},
+					Line:        `awsToken := \"AKIALALEMEL33243OLIA\"`,
+					Match:       "AKIALALEMEL33243OLIA",
+					Secret:      "AKIALALEMEL33243OLIA",
+					Entropy:     3.0841837,
 					StartLine:   0,
 					StartLine:   0,
 					EndLine:     0,
 					EndLine:     0,
 					StartColumn: 15,
 					StartColumn: 15,
 					EndColumn:   34,
 					EndColumn:   34,
-					Entropy:     3.0841837,
+					Tags:        []string{"key", "AWS"},
 				},
 				},
 			},
 			},
 		},
 		},
-		{
+		"detect finding - sidekiq env var": {
 			cfgName: "simple",
 			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `export BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;`,
 				Raw:      `export BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;`,
@@ -164,22 +175,22 @@ func TestDetect(t *testing.T) {
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
+					RuleID:      "sidekiq-secret",
 					Description: "Sidekiq Secret",
 					Description: "Sidekiq Secret",
+					File:        "tmp.sh",
+					Line:        `export BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;`,
 					Match:       "BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;",
 					Match:       "BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;",
 					Secret:      "cafebabe:deadbeef",
 					Secret:      "cafebabe:deadbeef",
-					Line:        `export BUNDLE_ENTERPRISE__CONTRIBSYS__COM=cafebabe:deadbeef;`,
-					File:        "tmp.sh",
-					RuleID:      "sidekiq-secret",
-					Tags:        []string{},
 					Entropy:     2.6098502,
 					Entropy:     2.6098502,
 					StartLine:   0,
 					StartLine:   0,
 					EndLine:     0,
 					EndLine:     0,
 					StartColumn: 8,
 					StartColumn: 8,
 					EndColumn:   60,
 					EndColumn:   60,
+					Tags:        []string{},
 				},
 				},
 			},
 			},
 		},
 		},
-		{
+		"detect finding - sidekiq env var, semicolon": {
 			cfgName: "simple",
 			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `echo hello1; export BUNDLE_ENTERPRISE__CONTRIBSYS__COM="cafebabe:deadbeef" && echo hello2`,
 				Raw:      `echo hello1; export BUNDLE_ENTERPRISE__CONTRIBSYS__COM="cafebabe:deadbeef" && echo hello2`,
@@ -187,22 +198,22 @@ func TestDetect(t *testing.T) {
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
+					RuleID:      "sidekiq-secret",
 					Description: "Sidekiq Secret",
 					Description: "Sidekiq Secret",
-					Match:       "BUNDLE_ENTERPRISE__CONTRIBSYS__COM=\"cafebabe:deadbeef\"",
-					Secret:      "cafebabe:deadbeef",
 					File:        "tmp.sh",
 					File:        "tmp.sh",
 					Line:        `echo hello1; export BUNDLE_ENTERPRISE__CONTRIBSYS__COM="cafebabe:deadbeef" && echo hello2`,
 					Line:        `echo hello1; export BUNDLE_ENTERPRISE__CONTRIBSYS__COM="cafebabe:deadbeef" && echo hello2`,
-					RuleID:      "sidekiq-secret",
-					Tags:        []string{},
+					Match:       "BUNDLE_ENTERPRISE__CONTRIBSYS__COM=\"cafebabe:deadbeef\"",
+					Secret:      "cafebabe:deadbeef",
 					Entropy:     2.6098502,
 					Entropy:     2.6098502,
 					StartLine:   0,
 					StartLine:   0,
 					EndLine:     0,
 					EndLine:     0,
 					StartColumn: 21,
 					StartColumn: 21,
 					EndColumn:   74,
 					EndColumn:   74,
+					Tags:        []string{},
 				},
 				},
 			},
 			},
 		},
 		},
-		{
+		"detect finding - sidekiq url": {
 			cfgName: "simple",
 			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `url = "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:80/path?param1=true&param2=false#heading1"`,
 				Raw:      `url = "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:80/path?param1=true&param2=false#heading1"`,
@@ -210,153 +221,212 @@ func TestDetect(t *testing.T) {
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
+					RuleID:      "sidekiq-sensitive-url",
 					Description: "Sidekiq Sensitive URL",
 					Description: "Sidekiq Sensitive URL",
-					Match:       "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:",
-					Secret:      "cafeb4b3:d3adb33f",
 					File:        "tmp.sh",
 					File:        "tmp.sh",
 					Line:        `url = "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:80/path?param1=true&param2=false#heading1"`,
 					Line:        `url = "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:80/path?param1=true&param2=false#heading1"`,
-					RuleID:      "sidekiq-sensitive-url",
-					Tags:        []string{},
+					Match:       "http://cafeb4b3:d3adb33f@enterprise.contribsys.com:",
+					Secret:      "cafeb4b3:d3adb33f",
 					Entropy:     2.984234,
 					Entropy:     2.984234,
 					StartLine:   0,
 					StartLine:   0,
 					EndLine:     0,
 					EndLine:     0,
 					StartColumn: 8,
 					StartColumn: 8,
 					EndColumn:   58,
 					EndColumn:   58,
+					Tags:        []string{},
 				},
 				},
 			},
 			},
 		},
 		},
-		{
-			cfgName: "allow_aws_re",
+		"ignore finding - our config file": {
+			cfgName: "simple",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
 				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
-				FilePath: "tmp.go",
+				FilePath: filepath.Join(configPath, "simple.toml"),
 			},
 			},
 		},
 		},
-		{
-			cfgName: "allow_path",
+		"ignore finding - doesn't match path": {
+			cfgName: "generic_with_py_path",
 			fragment: Fragment{
 			fragment: Fragment{
-				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
 				FilePath: "tmp.go",
 				FilePath: "tmp.go",
 			},
 			},
 		},
 		},
-		{
-			cfgName: "allow_commit",
-			fragment: Fragment{
-				Raw:       `awsToken := \"AKIALALEMEL33243OLIA\"`,
-				FilePath:  "tmp.go",
-				CommitSHA: "allowthiscommit",
-			},
-		},
-		{
-			cfgName: "entropy_group",
+		"detect finding - matches path,regex,entropy": {
+			cfgName: "generic_with_py_path",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
 				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
-				FilePath: "tmp.go",
+				FilePath: "tmp.py",
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
-					Description: "Discord API key",
-					Match:       "Discord_Public_Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
-					Secret:      "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5",
+					RuleID:      "generic-api-key",
+					Description: "Generic API Key",
+					File:        "tmp.py",
 					Line:        `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
 					Line:        `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
-					File:        "tmp.go",
-					RuleID:      "discord-api-key",
-					Tags:        []string{},
+					Match:       "Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
+					Secret:      "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5",
 					Entropy:     3.7906237,
 					Entropy:     3.7906237,
 					StartLine:   0,
 					StartLine:   0,
 					EndLine:     0,
 					EndLine:     0,
-					StartColumn: 7,
+					StartColumn: 22,
 					EndColumn:   93,
 					EndColumn:   93,
+					Tags:        []string{},
 				},
 				},
 			},
 			},
 		},
 		},
-		{
+		"ignore finding - allowlist regex": {
 			cfgName: "generic_with_py_path",
 			cfgName: "generic_with_py_path",
+			fragment: Fragment{
+				Raw:      `const Discord_Public_Key = "load2523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+				FilePath: "tmp.py",
+			},
+		},
+
+		// Rule
+		"rule - ignore path": {
+			cfgName:      "valid/rule_path_only",
+			baselinePath: ".baseline.json",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
 				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
-				FilePath: "tmp.go",
+				FilePath: ".baseline.json",
 			},
 			},
 		},
 		},
-		{
-			cfgName: "generic_with_py_path",
+		"rule - detect path ": {
+			cfgName: "valid/rule_path_only",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
 				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
 				FilePath: "tmp.py",
 				FilePath: "tmp.py",
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
-					Description: "Generic API Key",
-					Match:       "Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
-					Secret:      "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5",
-					Line:        `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+					Description: "Python Files",
+					Match:       "file detected: tmp.py",
 					File:        "tmp.py",
 					File:        "tmp.py",
-					RuleID:      "generic-api-key",
+					RuleID:      "python-files-only",
 					Tags:        []string{},
 					Tags:        []string{},
+				},
+			},
+		},
+		"rule - match based on entropy": {
+			cfgName: "valid/rule_entropy_group",
+			fragment: Fragment{
+				Raw: `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"
+//const Discord_Public_Key = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+`,
+				FilePath: "tmp.go",
+			},
+			expectedFindings: []report.Finding{
+				{
+					RuleID:      "discord-api-key",
+					Description: "Discord API key",
+					File:        "tmp.go",
+					Line:        `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+					Match:       "Discord_Public_Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
+					Secret:      "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5",
 					Entropy:     3.7906237,
 					Entropy:     3.7906237,
 					StartLine:   0,
 					StartLine:   0,
 					EndLine:     0,
 					EndLine:     0,
-					StartColumn: 22,
+					StartColumn: 7,
 					EndColumn:   93,
 					EndColumn:   93,
+					Tags:        []string{},
 				},
 				},
 			},
 			},
 		},
 		},
-		{
-			cfgName: "path_only",
+
+		// Allowlists
+		"global allowlist - ignore regex": {
+			cfgName: "valid/allowlist_global_regex",
 			fragment: Fragment{
 			fragment: Fragment{
-				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
-				FilePath: "tmp.py",
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
+			},
+		},
+		"global allowlist - detect, doesn't match all conditions": {
+			cfgName: "valid/allowlist_global_multiple",
+			fragment: Fragment{
+				Raw: `
+const token = "mockSecret";
+// const token = "changeit";`,
+				FilePath: "config.txt",
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
 				{
 				{
-					Description: "Python Files",
-					Match:       "file detected: tmp.py",
-					File:        "tmp.py",
-					RuleID:      "python-files-only",
+					RuleID:      "test",
+					File:        "config.txt",
+					Line:        "\nconst token = \"mockSecret\";",
+					Match:       `token = "mockSecret"`,
+					Secret:      "mockSecret",
+					Entropy:     2.9219282,
+					StartLine:   1,
+					EndLine:     1,
+					StartColumn: 8,
+					EndColumn:   27,
 					Tags:        []string{},
 					Tags:        []string{},
 				},
 				},
 			},
 			},
 		},
 		},
-		{
-			cfgName: "bad_entropy_group",
+		"global allowlist - ignore, matches all conditions": {
+			cfgName: "valid/allowlist_global_multiple",
 			fragment: Fragment{
 			fragment: Fragment{
-				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
-				FilePath: "tmp.go",
+				Raw:      `token := "mockSecret";`,
+				FilePath: "node_modules/config.txt",
 			},
 			},
-			wantError: fmt.Errorf("discord-api-key: invalid regex secret group 5, max regex secret group 3"),
 		},
 		},
-		{
-			cfgName: "simple",
+		"global allowlist - detect path, doesn't match all conditions": {
+			cfgName: "valid/allowlist_global_multiple",
 			fragment: Fragment{
 			fragment: Fragment{
-				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
-				FilePath: filepath.Join(configPath, "simple.toml"),
+				Raw:      `var token = "fakeSecret";`,
+				FilePath: "node_modules/config.txt",
+			},
+			expectedFindings: []report.Finding{
+				{
+					RuleID:      "test",
+					File:        "node_modules/config.txt",
+					Line:        "var token = \"fakeSecret\";",
+					Match:       `token = "fakeSecret"`,
+					Secret:      "fakeSecret",
+					Entropy:     2.8464394,
+					StartLine:   0,
+					EndLine:     0,
+					StartColumn: 5,
+					EndColumn:   24,
+					Tags:        []string{},
+				},
 			},
 			},
 		},
 		},
-		{
-			cfgName: "allow_global_aws_re",
+		"allowlist - ignore commit": {
+			cfgName: "valid/allowlist_rule_commit",
+			fragment: Fragment{
+				Raw:       `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath:  "tmp.go",
+				CommitSHA: "allowthiscommit",
+			},
+		},
+		"allowlist - ignore path": {
+			cfgName: "valid/allowlist_rule_path",
 			fragment: Fragment{
 			fragment: Fragment{
 				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
 				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
 				FilePath: "tmp.go",
 				FilePath: "tmp.go",
 			},
 			},
 		},
 		},
-		{
-			cfgName: "generic_with_py_path",
+		"allowlist - ignore path when extending": {
+			cfgName: "valid/allowlist_rule_extend_default",
 			fragment: Fragment{
 			fragment: Fragment{
-				Raw:      `const Discord_Public_Key = "load2523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
-				FilePath: "tmp.py",
+				Raw:      `token = "aebfab88-7596-481d-82e8-c60c8f7de0c0"`,
+				FilePath: "path/to/your/problematic/file.js",
 			},
 			},
 		},
 		},
-		{
-			cfgName:      "path_only",
-			baselinePath: ".baseline.json",
+		"allowlist - ignore regex": {
+			cfgName: "valid/allowlist_rule_regex",
 			fragment: Fragment{
 			fragment: Fragment{
-				Raw:      `const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
-				FilePath: ".baseline.json",
+				Raw:      `awsToken := \"AKIALALEMEL33243OLIA\"`,
+				FilePath: "tmp.go",
 			},
 			},
 		},
 		},
-		{
-			cfgName: "base64_encoded",
+		// Decoding
+		"detect encoded": {
+			cfgName: "encoded",
 			fragment: Fragment{
 			fragment: Fragment{
-				Raw:      b64TestValues,
+				Raw:      encodedTestValues,
 				FilePath: "tmp.go",
 				FilePath: "tmp.go",
 			},
 			},
 			expectedFindings: []report.Finding{
 			expectedFindings: []report.Finding{
@@ -402,6 +472,90 @@ func TestDetect(t *testing.T) {
 					EndColumn:   207,
 					EndColumn:   207,
 					Entropy:     5.350665,
 					Entropy:     5.350665,
 				},
 				},
+				{ // Encoded Small secret at the end to make sure it's picked up by the decoding
+					Description: "Small Secret",
+					Secret:      "small-secret",
+					Match:       "small-secret",
+					File:        "tmp.go",
+					Line:        "\nc21hbGwtc2VjcmV0",
+					RuleID:      "small-secret",
+					Tags:        []string{"small", "secret", "decoded:base64", "decode-depth:1"},
+					StartLine:   15,
+					EndLine:     15,
+					StartColumn: 2,
+					EndColumn:   17,
+					Entropy:     3.0849626,
+				},
+				{ // Secret where the decoded match goes outside the encoded value
+					Description: "Overlapping",
+					Secret:      "decoded-secret-value00",
+					Match:       "secret=decoded-secret-value00",
+					File:        "tmp.go",
+					Line:        "\nsecret=ZGVjb2RlZC1zZWNyZXQtdmFsdWUwMA==",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:base64", "decode-depth:1"},
+					StartLine:   18,
+					EndLine:     18,
+					StartColumn: 2,
+					EndColumn:   40,
+					Entropy:     3.4428623,
+				},
+				{ // This just confirms that with no allowlist the pattern is detected (i.e. the regex is good)
+					Description: "Make sure this would be detected with no allowlist",
+					Secret:      "lRqBK-z5kf4-please-ignore-me-X-XIJM2Pddw",
+					Match:       "password=\"lRqBK-z5kf4-please-ignore-me-X-XIJM2Pddw\"",
+					File:        "tmp.go",
+					Line:        "\npassword=\"bFJxQkstejVrZjQtcGxlYXNlLWlnbm9yZS1tZS1YLVhJSk0yUGRkdw==\"",
+					RuleID:      "decoded-password-dont-ignore",
+					Tags:        []string{"decode-ignore", "decoded:base64", "decode-depth:1"},
+					StartLine:   23,
+					EndLine:     23,
+					StartColumn: 2,
+					EndColumn:   68,
+					Entropy:     4.5841837,
+				},
+				{ // Hex encoded data check
+					Description: "Overlapping",
+					Secret:      "decoded-secret-valuevHEX",
+					Match:       "secret=decoded-secret-valuevHEX",
+					File:        "tmp.go",
+					Line:        "\nsecret=6465636F6465642D7365637265742D76616C756576484558",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:hex", "decode-depth:1"},
+					StartLine:   26,
+					EndLine:     26,
+					StartColumn: 2,
+					EndColumn:   56,
+					Entropy:     3.6531072,
+				},
+				{ // handle partial encoded percent data
+					Description: "Overlapping",
+					Secret:      "decoded-secret-valuev2",
+					Match:       "secret=decoded-secret-valuev2",
+					File:        "tmp.go",
+					Line:        "\nsecret=decoded-%73%65%63%72%65%74-valuev2",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decode-depth:1"},
+					StartLine:   30,
+					EndLine:     30,
+					StartColumn: 2,
+					EndColumn:   42,
+					Entropy:     3.4428623,
+				},
+				{ // handle partial encoded percent data
+					Description: "Overlapping",
+					Secret:      "decoded-secret-valuev3",
+					Match:       "secret=decoded-secret-valuev3",
+					File:        "tmp.go",
+					Line:        "\nsecret=%64%65coded-%73%65%63%72%65%74-valuev3",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decode-depth:1"},
+					StartLine:   32,
+					EndLine:     32,
+					StartColumn: 2,
+					EndColumn:   46,
+					Entropy:     3.4428623,
+				},
 				{ // Encoded AWS config with a access key id inside a JWT
 				{ // Encoded AWS config with a access key id inside a JWT
 					Description: "AWS IAM Unique Identifier",
 					Description: "AWS IAM Unique Identifier",
 					Secret:      "ASIAIOSFODNN7LXM10JI",
 					Secret:      "ASIAIOSFODNN7LXM10JI",
@@ -430,68 +584,138 @@ func TestDetect(t *testing.T) {
 					EndColumn:   344,
 					EndColumn:   344,
 					Entropy:     4.721928,
 					Entropy:     4.721928,
 				},
 				},
-				{ // Encoded Small secret at the end to make sure it's picked up by the decoding
-					Description: "Small Secret",
-					Secret:      "small-secret",
-					Match:       "small-secret",
+				{ // Secret where the decoded match goes outside the encoded value and then encoded again
+					Description: "Overlapping",
+					Secret:      "decoded-secret-value",
+					Match:       "secret=decoded-secret-value",
 					File:        "tmp.go",
 					File:        "tmp.go",
-					Line:        "\nc21hbGwtc2VjcmV0",
-					RuleID:      "small-secret",
-					Tags:        []string{"small", "secret", "decoded:base64", "decode-depth:1"},
-					StartLine:   15,
-					EndLine:     15,
+					Line:        "\nc2VjcmV0PVpHVmpiMlJsWkMxelpXTnlaWFF0ZG1Gc2RXVT0=",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:base64", "decode-depth:2"},
+					StartLine:   20,
+					EndLine:     20,
 					StartColumn: 2,
 					StartColumn: 2,
-					EndColumn:   17,
-					Entropy:     3.0849626,
+					EndColumn:   49,
+					Entropy:     3.3037016,
 				},
 				},
-				{ // Secret where the decoded match goes outside the encoded value
+				{ // handle encodings that touch eachother
+					Description: "Overlapping",
+					Secret:      "decoded-secret-valuev5",
+					Match:       "secret=decoded-secret-valuev5",
+					File:        "tmp.go",
+					Line:        "\nsecret%3d6465636F6465642D7365637265742D76616C75657635",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:hex", "decode-depth:2"},
+					StartLine:   40,
+					EndLine:     40,
+					StartColumn: 2,
+					EndColumn:   54,
+					Entropy:     3.4428623,
+				},
+				{ // handle partial encoded percent data465642D7365637265742D76616C75657635
+					Description: "Overlapping",
+					Secret:      "decoded-secret-valuev4",
+					Match:       "secret=decoded-secret-valuev4",
+					File:        "tmp.go",
+					Line:        "\nc2VjcmV0PVpHVmpiMl%4AsWkMxelpXTnlaWFF0ZG1Gc2RXVjJOQT09",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:base64", "decode-depth:3"},
+					StartLine:   38,
+					EndLine:     38,
+					StartColumn: 2,
+					EndColumn:   55,
+					Entropy:     3.4428623,
+				},
+				{ // multiple percent encodings in a single layer base64
+					Description: "Overlapping",
+					Secret:      "decoded-secret-valuex86",
+					Match:       "secret=decoded-secret-valuex86",
+					File:        "tmp.go",
+					Line:        "\nsecret=ZGVjb2%52lZC1zZWNyZXQtdm%46sdWV4ODY=  # ends in x86",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:base64", "decode-depth:2"},
+					StartLine:   42,
+					EndLine:     42,
+					StartColumn: 2,
+					EndColumn:   44,
+					Entropy:     3.6381476,
+				},
+				{ // base64 encoded partially percent encoded value
 					Description: "Overlapping",
 					Description: "Overlapping",
 					Secret:      "decoded-secret-value",
 					Secret:      "decoded-secret-value",
 					Match:       "secret=decoded-secret-value",
 					Match:       "secret=decoded-secret-value",
 					File:        "tmp.go",
 					File:        "tmp.go",
-					Line:        "\nsecret=ZGVjb2RlZC1zZWNyZXQtdmFsdWU=",
+					Line:        "\nsecret=ZGVjb2RlZC0lNzMlNjUlNjMlNzIlNjUlNzQtdmFsdWU=",
 					RuleID:      "overlapping",
 					RuleID:      "overlapping",
-					Tags:        []string{"overlapping", "decoded:base64", "decode-depth:1"},
-					StartLine:   18,
-					EndLine:     18,
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:base64", "decode-depth:2"},
+					StartLine:   44,
+					EndLine:     44,
 					StartColumn: 2,
 					StartColumn: 2,
-					EndColumn:   36,
+					EndColumn:   52,
 					Entropy:     3.3037016,
 					Entropy:     3.3037016,
 				},
 				},
-				{ // Secret where the decoded match goes outside the encoded value and then encoded again
+				{ // one of the lines above that went through... a lot
 					Description: "Overlapping",
 					Description: "Overlapping",
 					Secret:      "decoded-secret-value",
 					Secret:      "decoded-secret-value",
 					Match:       "secret=decoded-secret-value",
 					Match:       "secret=decoded-secret-value",
 					File:        "tmp.go",
 					File:        "tmp.go",
-					Line:        "\nc2VjcmV0PVpHVmpiMlJsWkMxelpXTnlaWFF0ZG1Gc2RXVT0=",
+					Line:        "\nLook at this value: %4EjMzMjU2NkE2MzZENTYzMDUwNTY3MDQ4%4eTY2RDcwNjk0RDY5NTUzMTRENkQ3ODYx%25%34%65TE3QTQ2MzY1NzZDNjQ0RjY1NTY3MDU5NTU1ODUyNkI2MjUzNTUzMDRFNkU0RTZCNTYzMTU1MzkwQQ== # isn't it crazy?",
 					RuleID:      "overlapping",
 					RuleID:      "overlapping",
-					Tags:        []string{"overlapping", "decoded:base64", "decode-depth:2"},
-					StartLine:   20,
-					EndLine:     20,
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:hex", "decoded:base64", "decode-depth:7"},
+					StartLine:   47,
+					EndLine:     47,
+					StartColumn: 22,
+					EndColumn:   177,
+					Entropy:     3.3037016,
+				},
+				{ // Multi percent encode two random characters close to the bounds of the base64
+					Description: "Overlapping",
+					Secret:      "decoded-secret-value",
+					Match:       "secret=decoded-secret-value",
+					File:        "tmp.go",
+					Line:        "\nsecret=ZG%25%32%35%25%33%32%25%33%35%25%32%35%25%33%33%25%33%35%25%32%35%25%33%33%25%33%36%25%32%35%25%33%32%25%33%35%25%32%35%25%33%33%25%33%36%25%32%35%25%33%36%25%33%31%25%32%35%25%33%32%25%33%35%25%32%35%25%33%33%25%33%36%25%32%35%25%33%33%25%33%322RlZC1zZWNyZXQtd%25%36%64%25%34%36%25%37%33dWU=",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:base64", "decode-depth:5"},
+					StartLine:   50,
+					EndLine:     50,
 					StartColumn: 2,
 					StartColumn: 2,
-					EndColumn:   49,
+					EndColumn:   300,
 					Entropy:     3.3037016,
 					Entropy:     3.3037016,
 				},
 				},
-				{ // This just confirms that with no allowlist the pattern is detected (i.e. the regex is good)
-					Description: "Make sure this would be detected with no allowlist",
-					Secret:      "lRqBK-z5kf4-please-ignore-me-X-XIJM2Pddw",
-					Match:       "password=\"lRqBK-z5kf4-please-ignore-me-X-XIJM2Pddw\"",
+				{ // The similar to the above but also touching the edge of the base64
+					Description: "Overlapping",
+					Secret:      "decoded-secret-value",
+					Match:       "secret=decoded-secret-value",
 					File:        "tmp.go",
 					File:        "tmp.go",
-					Line:        "\npassword=\"bFJxQkstejVrZjQtcGxlYXNlLWlnbm9yZS1tZS1YLVhJSk0yUGRkdw==\"",
-					RuleID:      "decoded-password-dont-ignore",
-					Tags:        []string{"decode-ignore", "decoded:base64", "decode-depth:1"},
-					StartLine:   23,
-					EndLine:     23,
+					Line:        "\nsecret=%25%35%61%25%34%37%25%35%36jb2RlZC1zZWNyZXQtdmFsdWU%25%32%35%25%33%33%25%36%34",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:base64", "decode-depth:4"},
+					StartLine:   52,
+					EndLine:     52,
 					StartColumn: 2,
 					StartColumn: 2,
-					EndColumn:   68,
-					Entropy:     4.5841837,
+					EndColumn:   86,
+					Entropy:     3.3037016,
+				},
+				{ // The similar to the above but also touching and overlapping the base64
+					Description: "Overlapping",
+					Secret:      "decoded-secret-value",
+					Match:       "secret=decoded-secret-value",
+					File:        "tmp.go",
+					Line:        "\nsecret%3D%25%35%61%25%34%37%25%35%36jb2RlZC1zZWNyZXQtdmFsdWU%25%32%35%25%33%33%25%36%34",
+					RuleID:      "overlapping",
+					Tags:        []string{"overlapping", "decoded:percent", "decoded:base64", "decode-depth:4"},
+					StartLine:   54,
+					EndLine:     54,
+					StartColumn: 2,
+					EndColumn:   88,
+					Entropy:     3.3037016,
 				},
 				},
 			},
 			},
 		},
 		},
 	}
 	}
 
 
-	for _, tt := range tests {
-		t.Run(fmt.Sprintf("%s - %s", tt.cfgName, tt.fragment.FilePath), func(t *testing.T) {
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
 			viper.Reset()
 			viper.Reset()
 			viper.AddConfigPath(configPath)
 			viper.AddConfigPath(configPath)
 			viper.SetConfigName(tt.cfgName)
 			viper.SetConfigName(tt.cfgName)
@@ -851,7 +1075,7 @@ func TestFromFiles(t *testing.T) {
 			require.NoError(t, err)
 			require.NoError(t, err)
 
 
 			detector.FollowSymlinks = true
 			detector.FollowSymlinks = true
-			paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true, cfg.Allowlist.PathAllowed)
+			paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true, cfg.Allowlists)
 			require.NoError(t, err)
 			require.NoError(t, err)
 
 
 			findings, err := detector.DetectFiles(paths)
 			findings, err := detector.DetectFiles(paths)
@@ -925,7 +1149,7 @@ func TestDetectWithSymlinks(t *testing.T) {
 		cfg, _ := vc.Translate()
 		cfg, _ := vc.Translate()
 		detector := NewDetector(cfg)
 		detector := NewDetector(cfg)
 		detector.FollowSymlinks = true
 		detector.FollowSymlinks = true
-		paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true, cfg.Allowlist.PathAllowed)
+		paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true, cfg.Allowlists)
 		require.NoError(t, err)
 		require.NoError(t, err)
 
 
 		findings, err := detector.DetectFiles(paths)
 		findings, err := detector.DetectFiles(paths)
@@ -1157,7 +1381,7 @@ let password = 'Summer2024!';`
 
 
 			f := tc.fragment
 			f := tc.fragment
 			f.Raw = raw
 			f.Raw = raw
-			actual := d.detectRule(f, raw, rule, []EncodedSegment{})
+			actual := d.detectRule(f, raw, rule, []*codec.EncodedSegment{})
 			if diff := cmp.Diff(tc.expected, actual); diff != "" {
 			if diff := cmp.Diff(tc.expected, actual); diff != "" {
 				t.Errorf("diff: (-want +got)\n%s", diff)
 				t.Errorf("diff: (-want +got)\n%s", diff)
 			}
 			}
@@ -1211,7 +1435,6 @@ func TestNormalizeGitleaksIgnorePaths(t *testing.T) {
 }
 }
 
 
 func TestWindowsFileSeparator_RulePath(t *testing.T) {
 func TestWindowsFileSeparator_RulePath(t *testing.T) {
-	logging.Logger = logging.Logger.Level(zerolog.TraceLevel)
 	unixRule := config.Rule{
 	unixRule := config.Rule{
 		RuleID: "test-rule",
 		RuleID: "test-rule",
 		Path:   regexp.MustCompile(`(^|/)\.m2/settings\.xml`),
 		Path:   regexp.MustCompile(`(^|/)\.m2/settings\.xml`),
@@ -1319,7 +1542,7 @@ func TestWindowsFileSeparator_RulePath(t *testing.T) {
 	require.NoError(t, err)
 	require.NoError(t, err)
 	for name, test := range tests {
 	for name, test := range tests {
 		t.Run(name, func(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
-			actual := d.detectRule(test.fragment, test.fragment.Raw, test.rule, []EncodedSegment{})
+			actual := d.detectRule(test.fragment, test.fragment.Raw, test.rule, []*codec.EncodedSegment{})
 			if diff := cmp.Diff(test.expected, actual); diff != "" {
 			if diff := cmp.Diff(test.expected, actual); diff != "" {
 				t.Errorf("diff: (-want +got)\n%s", diff)
 				t.Errorf("diff: (-want +got)\n%s", diff)
 			}
 			}
@@ -1505,7 +1728,7 @@ func TestWindowsFileSeparator_RuleAllowlistPaths(t *testing.T) {
 	require.NoError(t, err)
 	require.NoError(t, err)
 	for name, test := range tests {
 	for name, test := range tests {
 		t.Run(name, func(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
-			actual := d.detectRule(test.fragment, test.fragment.Raw, test.rule, []EncodedSegment{})
+			actual := d.detectRule(test.fragment, test.fragment.Raw, test.rule, []*codec.EncodedSegment{})
 			if diff := cmp.Diff(test.expected, actual); diff != "" {
 			if diff := cmp.Diff(test.expected, actual); diff != "" {
 				t.Errorf("diff: (-want +got)\n%s", diff)
 				t.Errorf("diff: (-want +got)\n%s", diff)
 			}
 			}

+ 4 - 6
detect/directory.go

@@ -20,9 +20,8 @@ const maxPeekSize = 25 * 1_000 // 10kb
 // DetectFiles schedules each ScanTarget—file or archive—for concurrent scanning.
 // DetectFiles schedules each ScanTarget—file or archive—for concurrent scanning.
 func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Finding, error) {
 func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Finding, error) {
 	for pa := range paths {
 	for pa := range paths {
-		pa := pa // capture
 		d.Sema.Go(func() error {
 		d.Sema.Go(func() error {
-			return d.DetectScanTarget(pa)
+			return d.detectScanTarget(pa)
 		})
 		})
 	}
 	}
 
 
@@ -32,10 +31,9 @@ func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Findin
 	return d.findings, nil
 	return d.findings, nil
 }
 }
 
 
-// DetectScanTarget handles one ScanTarget: it unpacks archives recursively
+// detectScanTarget handles one ScanTarget: it unpacks archives recursively
 // or scans a regular file, always using VirtualPath for reporting.
 // or scans a regular file, always using VirtualPath for reporting.
-// TODO maybe find a better solution for this? relying on `scanTarget` seems off.
-func (d *Detector) DetectScanTarget(scanTarget sources.ScanTarget) error {
+func (d *Detector) detectScanTarget(scanTarget sources.ScanTarget) error {
 	// Choose display path: either VirtualPath (archive chain) or on-disk path.
 	// Choose display path: either VirtualPath (archive chain) or on-disk path.
 	display := scanTarget.Path
 	display := scanTarget.Path
 	if scanTarget.VirtualPath != "" {
 	if scanTarget.VirtualPath != "" {
@@ -71,7 +69,7 @@ func (d *Detector) DetectScanTarget(scanTarget sources.ScanTarget) error {
 			}
 			}
 
 
 			d.Sema.Go(func() error {
 			d.Sema.Go(func() error {
-				return d.DetectScanTarget(t)
+				return d.detectScanTarget(t)
 			})
 			})
 		}
 		}
 
 

+ 16 - 4
detect/git.go

@@ -38,9 +38,11 @@ func (d *Detector) DetectGit(cmd *sources.GitCmd, remote *RemoteInfo) ([]report.
 			commitSHA := ""
 			commitSHA := ""
 			if gitdiffFile.PatchHeader != nil {
 			if gitdiffFile.PatchHeader != nil {
 				commitSHA = gitdiffFile.PatchHeader.SHA
 				commitSHA = gitdiffFile.PatchHeader.SHA
-				if ok, c := d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA); ok {
-					logging.Trace().Str("allowed-commit", c).Msg("skipping commit: global allowlist")
-					continue
+				for _, a := range d.Config.Allowlists {
+					if ok, c := a.CommitAllowed(gitdiffFile.PatchHeader.SHA); ok {
+						logging.Trace().Str("allowed-commit", c).Msg("skipping commit: global allowlist")
+						continue
+					}
 				}
 				}
 			}
 			}
 
 
@@ -85,7 +87,7 @@ func (d *Detector) DetectGit(cmd *sources.GitCmd, remote *RemoteInfo) ([]report.
 						t.GitInfo.Message = gitdiffFile.PatchHeader.Message()
 						t.GitInfo.Message = gitdiffFile.PatchHeader.Message()
 						t.GitInfo.Email = gitdiffFile.PatchHeader.Author.Email
 						t.GitInfo.Email = gitdiffFile.PatchHeader.Author.Email
 
 
-						d.DetectScanTarget(t)
+						d.detectScanTarget(t)
 					}
 					}
 					os.RemoveAll(tmpDir)
 					os.RemoveAll(tmpDir)
 					return nil
 					return nil
@@ -111,9 +113,19 @@ func (d *Detector) DetectGit(cmd *sources.GitCmd, remote *RemoteInfo) ([]report.
 						FilePath:  gitdiffFile.NewName,
 						FilePath:  gitdiffFile.NewName,
 					}
 					}
 
 
+					timer := time.AfterFunc(SlowWarningThreshold, func() {
+						logging.Debug().
+							Str("commit", commitSHA[:7]).
+							Str("path", fragment.FilePath).
+							Msgf("Taking longer than %s to inspect fragment", SlowWarningThreshold.String())
+					})
 					for _, finding := range d.Detect(fragment) {
 					for _, finding := range d.Detect(fragment) {
 						d.AddFinding(augmentGitFinding(remote, finding, textFragment, gitdiffFile))
 						d.AddFinding(augmentGitFinding(remote, finding, textFragment, gitdiffFile))
 					}
 					}
+					if timer != nil {
+						timer.Stop()
+						timer = nil
+					}
 				}
 				}
 				return nil
 				return nil
 			})
 			})

+ 12 - 5
sources/directory.go

@@ -8,6 +8,7 @@ import (
 
 
 	"github.com/fatih/semgroup"
 	"github.com/fatih/semgroup"
 
 
+	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/logging"
 	"github.com/zricethezav/gitleaks/v8/logging"
 )
 )
 
 
@@ -29,7 +30,7 @@ type ScanTarget struct {
 
 
 var isWindows = runtime.GOOS == "windows"
 var isWindows = runtime.GOOS == "windows"
 
 
-func DirectoryTargets(source string, s *semgroup.Group, followSymlinks bool, shouldSkip func(string) bool) (<-chan ScanTarget, error) {
+func DirectoryTargets(source string, s *semgroup.Group, followSymlinks bool, allowlists []*config.Allowlist) (<-chan ScanTarget, error) {
 	paths := make(chan ScanTarget)
 	paths := make(chan ScanTarget)
 	s.Go(func() error {
 	s.Go(func() error {
 		defer close(paths)
 		defer close(paths)
@@ -77,10 +78,16 @@ func DirectoryTargets(source string, s *semgroup.Group, followSymlinks bool, sho
 				}
 				}
 
 
 				// TODO: Also run this check against the resolved symlink?
 				// TODO: Also run this check against the resolved symlink?
-				skip := shouldSkip(path) ||
-					// TODO: Remove this in v9.
-					// This is an awkward hack to mitigate https://github.com/gitleaks/gitleaks/issues/1641.
-					(isWindows && shouldSkip(filepath.ToSlash(path)))
+				var skip bool
+				for _, a := range allowlists {
+					skip = a.PathAllowed(path) ||
+						// TODO: Remove this in v9.
+						// This is an awkward hack to mitigate https://github.com/gitleaks/gitleaks/issues/1641.
+						(isWindows && a.PathAllowed(filepath.ToSlash(path)))
+					if skip {
+						break
+					}
+				}
 				if fInfo.IsDir() {
 				if fInfo.IsDir() {
 					// Directory
 					// Directory
 					if skip {
 					if skip {

+ 1 - 1
testdata/config/base64_encoded.toml → testdata/config/encoded.toml

@@ -70,7 +70,7 @@
   # goes outside the bounds of the encoded value
   # goes outside the bounds of the encoded value
   id = 'overlapping'
   id = 'overlapping'
   description = 'Overlapping'
   description = 'Overlapping'
-  regex = '''secret=(decoded-secret-value)'''
+  regex = '''secret=(decoded-secret-value\w*)'''
   tags = ['overlapping']
   tags = ['overlapping']
   secretGroup = 1
   secretGroup = 1
 
 

+ 2 - 2
testdata/config/generic.toml

@@ -1,10 +1,10 @@
 title = "gitleaks config"
 title = "gitleaks config"
 
 
 [[rules]]
 [[rules]]
-description = "Generic API Key"
 id = "generic-api-key"
 id = "generic-api-key"
+description = "Generic API Key"
 regex = '''(?i)(?:key|api|token|secret|client|passwd|password|auth|access)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-z\-_.=]{10,150})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
 regex = '''(?i)(?:key|api|token|secret|client|passwd|password|auth|access)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([0-9a-z\-_.=]{10,150})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
 entropy = 3.5
 entropy = 3.5
 keywords = [
 keywords = [
     "key","api","token","secret","client","passwd","password","auth","access",
     "key","api","token","secret","client","passwd","password","auth","access",
-]
+]

+ 8 - 9
testdata/config/generic_with_py_path.toml

@@ -1,15 +1,20 @@
 title = "gitleaks config"
 title = "gitleaks config"
 
 
 [[rules]]
 [[rules]]
-description = "Generic API Key"
 id = "generic-api-key"
 id = "generic-api-key"
-regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]'''
+description = "Generic API Key"
 path = '''.py'''
 path = '''.py'''
-entropy = 3.7
+regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]'''
 secretGroup = 4
 secretGroup = 4
+entropy = 3.7
 
 
 [allowlist]
 [allowlist]
 description = "global allow lists"
 description = "global allow lists"
+paths = [
+	'''gitleaks.toml''',
+	'''(.*?)(jpg|gif|doc|pdf|bin|svg|socket)$''',
+	'''(go.mod|go.sum)$'''
+]
 regexes = [
 regexes = [
     '''219-09-9999''', 
     '''219-09-9999''', 
     '''078-05-1120''', 
     '''078-05-1120''', 
@@ -27,10 +32,4 @@ regexes = [
 	'''api\_key''',
 	'''api\_key''',
 	'''apikey''',
 	'''apikey''',
 	'''api\-key''',
 	'''api\-key''',
-    ]
-paths = [
-    '''gitleaks.toml''',
-    '''(.*?)(jpg|gif|doc|pdf|bin|svg|socket)$''',
-    '''(go.mod|go.sum)$'''
 ]
 ]
-

+ 2 - 0
testdata/config/invalid/allowlist_global_empty.toml

@@ -0,0 +1,2 @@
+
+[[allowlists]]

+ 4 - 0
testdata/config/invalid/allowlist_global_old_and_new.toml

@@ -0,0 +1,4 @@
+[allowlist]
+regexes = ['''123''']
+[[allowlists]]
+regexes = ['''456''']

+ 3 - 0
testdata/config/invalid/allowlist_global_regextarget.toml

@@ -0,0 +1,3 @@
+[[allowlists]]
+regexTarget = "mtach"
+regexes = ['''456''']

+ 7 - 0
testdata/config/invalid/allowlist_global_target_rule_id.toml

@@ -0,0 +1,7 @@
+[[rules]]
+id = "github-app-token"
+regex = '''(?:ghu|ghs)_[0-9a-zA-Z]{36}'''
+
+[[allowlists]]
+targetRules = ["github-app-token", "github-pat"]
+regexes = ['''.*fake.*''']

+ 0 - 2
testdata/config/allowlist_invalid_empty.toml → testdata/config/invalid/allowlist_rule_empty.toml

@@ -1,5 +1,3 @@
-title = "simple config with allowlist for aws"
-
 [[rules]]
 [[rules]]
 id = "example"
 id = "example"
 regex = '''example\d+'''
 regex = '''example\d+'''

+ 0 - 2
testdata/config/allowlist_invalid_old_and_new.toml → testdata/config/invalid/allowlist_rule_old_and_new.toml

@@ -1,5 +1,3 @@
-title = "simple config with allowlist for aws"
-
 [[rules]]
 [[rules]]
 id = "example"
 id = "example"
 regex = '''example\d+'''
 regex = '''example\d+'''

+ 0 - 2
testdata/config/allowlist_invalid_regextarget.toml → testdata/config/invalid/allowlist_rule_regextarget.toml

@@ -1,5 +1,3 @@
-title = "simple config with allowlist for aws"
-
 [[rules]]
 [[rules]]
 id = "example"
 id = "example"
 regex = '''example\d+'''
 regex = '''example\d+'''

+ 5 - 3
testdata/config/allow_global_aws_re.toml → testdata/config/invalid/extend_invalid_base.toml

@@ -1,8 +1,10 @@
+title = "gitleaks extended 1"
+
+[extend]
+path="../testdata/config/invalid/does_not_exist.toml"
+
 [[rules]]
 [[rules]]
     description = "AWS Access Key"
     description = "AWS Access Key"
     id = "aws-access-key"
     id = "aws-access-key"
     regex = '''(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}'''
     regex = '''(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
     tags = ["key", "AWS"]
-
-[allowlist]
-    regexes = ['''AKIALALEMEL33243OLIA''']

+ 0 - 0
testdata/config/bad_entropy_group.toml → testdata/config/invalid/rule_bad_entropy_group.toml


+ 0 - 0
testdata/config/missing_id.toml → testdata/config/invalid/rule_missing_id.toml


+ 0 - 0
testdata/config/no_regex_or_path.toml → testdata/config/invalid/rule_no_regex_or_path.toml


+ 6 - 0
testdata/config/simple.toml

@@ -1,6 +1,12 @@
 title = "gitleaks config"
 title = "gitleaks config"
 # https://learnxinyminutes.com/docs/toml/ for toml reference
 # https://learnxinyminutes.com/docs/toml/ for toml reference
 
 
+[[rules]]
+    description = "1Password Secret Key"
+    id = "1password-secret-key"
+    regex = '''A3-[A-Z0-9]{6}-(?:(?:[A-Z0-9]{11})|(?:[A-Z0-9]{6}-[A-Z0-9]{5}))-[A-Z0-9]{5}-[A-Z0-9]{5}-[A-Z0-9]{5}'''
+    tags = ["1Password"]
+
 [[rules]]
 [[rules]]
     description = "AWS Access Key"
     description = "AWS Access Key"
     id = "aws-access-key"
     id = "aws-access-key"

+ 10 - 0
testdata/config/valid/allowlist_global_multiple.toml

@@ -0,0 +1,10 @@
+[[rules]]
+id = "test"
+regex = '''token = "(.+)"'''
+
+[[allowlists]]
+regexes = ["^changeit$"]
+[[allowlists]]
+condition = "AND"
+paths = ["^node_modules/.*"]
+stopwords = ["mock"]

+ 2 - 0
testdata/config/valid/allowlist_global_old_compat.toml

@@ -0,0 +1,2 @@
+[allowlist]
+stopwords = ["0989c462-69c9-49fa-b7d2-30dc5c576a97"]

+ 2 - 0
testdata/config/valid/allowlist_global_regex.toml

@@ -0,0 +1,2 @@
+[allowlist]
+    regexes = ['''AKIALALEM.L33243OLIA''']

+ 20 - 0
testdata/config/valid/allowlist_global_target_rules.toml

@@ -0,0 +1,20 @@
+[[rules]]
+id = "github-app-token"
+regex = '''(?:ghu|ghs)_[0-9a-zA-Z]{36}'''
+
+[[rules]]
+id = "github-oauth"
+regex = '''gho_[0-9a-zA-Z]{36}'''
+
+[[rules]]
+id = "github-pat"
+regex = '''ghp_[0-9a-zA-Z]{36}'''
+
+
+[[allowlists]]
+regexes = ['''.*fake.*''']
+[[allowlists]]
+targetRules = ["github-app-token", "github-pat"]
+paths = [
+    '''(?:^|/)@octokit/auth-token/README\.md$''',
+]

+ 0 - 0
testdata/config/allow_commit.toml → testdata/config/valid/allowlist_rule_commit.toml


+ 11 - 0
testdata/config/valid/allowlist_rule_extend_default.toml

@@ -0,0 +1,11 @@
+# https://github.com/gitleaks/gitleaks/issues/1844
+[extend]
+useDefault = true
+
+[[rules]]
+id = "generic-api-key"
+[[rules.allowlists]]
+description = "Exclude a specific file from generic-api-key rule"
+paths = [
+    '''^path/to/your/problematic/file\.js$'''
+]

+ 0 - 2
testdata/config/allowlist_old_compat.toml → testdata/config/valid/allowlist_rule_old_compat.toml

@@ -1,5 +1,3 @@
-title = "simple config with allowlist for aws"
-
 [[rules]]
 [[rules]]
     id = "example"
     id = "example"
     regex = '''example\d+'''
     regex = '''example\d+'''

+ 0 - 0
testdata/config/allow_path.toml → testdata/config/valid/allowlist_rule_path.toml


+ 0 - 0
testdata/config/allow_aws_re.toml → testdata/config/valid/allowlist_rule_regex.toml


+ 1 - 3
testdata/config/base.toml → testdata/config/valid/extend.toml

@@ -1,7 +1,5 @@
-title = "gitleaks config"
-
 [extend]
 [extend]
-path="../testdata/config/extend_1.toml"
+path="../testdata/config/valid/extend_base_1.toml"
 
 
 [[rules]]
 [[rules]]
     description = "AWS Secret Key"
     description = "AWS Secret Key"

+ 1 - 1
testdata/config/extend_1.toml → testdata/config/valid/extend_base_1.toml

@@ -1,7 +1,7 @@
 title = "gitleaks extended 1"
 title = "gitleaks extended 1"
 
 
 [extend]
 [extend]
-path="../testdata/config/extend_2.toml"
+path="../testdata/config/valid/extend_base_2.toml"
 
 
 [[rules]]
 [[rules]]
     description = "AWS Access Key"
     description = "AWS Access Key"

+ 0 - 0
testdata/config/extend_2.toml → testdata/config/valid/extend_base_2.toml


+ 0 - 0
testdata/config/extend_3.toml → testdata/config/valid/extend_base_3.toml


+ 1 - 1
testdata/config/extend_base_rule_including_keysword_with_attribute.toml → testdata/config/valid/extend_base_rule_including_keywords_with_attribute.toml

@@ -1,7 +1,7 @@
 title = "gitleaks extended 3"
 title = "gitleaks extended 3"
 
 
 [extend]
 [extend]
-path="../testdata/config/extend_rule_keywords_base.toml"
+path="../testdata/config/valid/extend_rule_keywords_base.toml"
 
 
 [[rules]]
 [[rules]]
     id = "aws-secret-key-again-again"
     id = "aws-secret-key-again-again"

+ 1 - 1
testdata/config/extend_disabled.toml → testdata/config/valid/extend_disabled.toml

@@ -1,7 +1,7 @@
 title = "gitleaks extend disable"
 title = "gitleaks extend disable"
 
 
 [extend]
 [extend]
-path = "../testdata/config/extend_disabled_base.toml"
+path = "../testdata/config/valid/extend_disabled_base.toml"
 disabledRules = [
 disabledRules = [
     'custom-rule1'
     'custom-rule1'
 ]
 ]

+ 0 - 0
testdata/config/extend_disabled_base.toml → testdata/config/valid/extend_disabled_base.toml


+ 1 - 1
testdata/config/extend_rule_allowlist_and.toml → testdata/config/valid/extend_rule_allowlist_and.toml

@@ -1,7 +1,7 @@
 title = "gitleaks extended 3"
 title = "gitleaks extended 3"
 
 
 [extend]
 [extend]
-path="../testdata/config/extend_rule_allowlist_base.toml"
+path="../testdata/config/valid/extend_rule_allowlist_base.toml"
 
 
 [[rules]]
 [[rules]]
     id = "aws-secret-key-again-again"
     id = "aws-secret-key-again-again"

+ 0 - 0
testdata/config/extend_rule_allowlist_base.toml → testdata/config/valid/extend_rule_allowlist_base.toml


+ 1 - 1
testdata/config/extend_rule_allowlist_or.toml → testdata/config/valid/extend_rule_allowlist_or.toml

@@ -1,7 +1,7 @@
 title = "gitleaks extended 3"
 title = "gitleaks extended 3"
 
 
 [extend]
 [extend]
-path="../testdata/config/extend_rule_allowlist_base.toml"
+path="../testdata/config/valid/extend_rule_allowlist_base.toml"
 
 
 [[rules]]
 [[rules]]
     id = "aws-secret-key-again-again"
     id = "aws-secret-key-again-again"

+ 0 - 0
testdata/config/extend_rule_keywords_base.toml → testdata/config/valid/extend_rule_keywords_base.toml


+ 1 - 1
testdata/config/extend_with_new_rule.toml → testdata/config/valid/extend_rule_new.toml

@@ -1,7 +1,7 @@
 title = "gitleaks extended 3"
 title = "gitleaks extended 3"
 
 
 [extend]
 [extend]
-path="../testdata/config/extend_rule_keywords_base.toml"
+path="../testdata/config/valid/extend_rule_keywords_base.toml"
 
 
 [[rules]]
 [[rules]]
     id = "aws-rule-that-is-not-in-base"
     id = "aws-rule-that-is-not-in-base"

+ 1 - 1
testdata/config/extend_empty_regexpath.toml → testdata/config/valid/extend_rule_no_regexpath.toml

@@ -1,5 +1,5 @@
 [extend]
 [extend]
-path="../testdata/config/extend_3.toml"
+path="../testdata/config/valid/extend_base_3.toml"
 
 
 [[rules]]
 [[rules]]
 id = "aws-secret-key-again-again"
 id = "aws-secret-key-again-again"

+ 0 - 0
testdata/config/override_description.toml → testdata/config/valid/extend_rule_override_description.toml


+ 0 - 0
testdata/config/override_entropy.toml → testdata/config/valid/extend_rule_override_entropy.toml


+ 0 - 0
testdata/config/override_keywords.toml → testdata/config/valid/extend_rule_override_keywords.toml


+ 0 - 0
testdata/config/override_path.toml → testdata/config/valid/extend_rule_override_path.toml


+ 0 - 0
testdata/config/override_regex.toml → testdata/config/valid/extend_rule_override_regex.toml


+ 0 - 0
testdata/config/override_secret_group.toml → testdata/config/valid/extend_rule_override_secret_group.toml


+ 0 - 0
testdata/config/override_tags.toml → testdata/config/valid/extend_rule_override_tags.toml


+ 0 - 2
testdata/config/entropy_group.toml → testdata/config/valid/rule_entropy_group.toml

@@ -1,5 +1,3 @@
-title = "gitleaks config"
-
 [[rules]]
 [[rules]]
 id = "discord-api-key"
 id = "discord-api-key"
 description = "Discord API key"
 description = "Discord API key"

+ 0 - 2
testdata/config/path_only.toml → testdata/config/valid/rule_path_only.toml

@@ -1,5 +1,3 @@
-title = "gitleaks config"
-
 [[rules]]
 [[rules]]
 description = "Python Files"
 description = "Python Files"
 id = "python-files-only"
 id = "python-files-only"

+ 1 - 4
testdata/config/escaped_character_group.toml → testdata/config/valid/rule_regex_escaped_character_group.toml

@@ -1,8 +1,5 @@
-title = "gitleaks config"
-# https://learnxinyminutes.com/docs/toml/ for toml reference
-
 [[rules]]
 [[rules]]
     id = "pypi-upload-token"
     id = "pypi-upload-token"
     description = "PyPI upload token"
     description = "PyPI upload token"
     regex = '''pypi-AgEIcHlwaS5vcmc[A-Za-z0-9\-_]{50,1000}'''
     regex = '''pypi-AgEIcHlwaS5vcmc[A-Za-z0-9\-_]{50,1000}'''
-    tags = ["key", "pypi"]
+    tags = ["key", "pypi"]