Преглед изворни кода

introducing secretGroup, the best group (#734)

* working on deduping

* my eyes... oh god my poor eyes

* more readme

* more readme

* more readme

* more readme and formatting
Zachary Rice пре 4 година
родитељ
комит
98d5648f6e

+ 25 - 3
README.md

@@ -127,7 +127,7 @@ Example output:
         "EndLine": 37,
         "StartColumn": 19,
         "EndColumn": 38,
-        "Context": "\t\t\"aws_secret= \\\"AKIAIMNOJVGFDXXXE4OA\\\"\":          true,",
+        "Match": "\t\t\"aws_secret= \\\"AKIAIMNOJVGFDXXXE4OA\\\"\":          true,",
         "Secret": "AKIAIMNOJVGFDXXXE4OA",
         "File": "checks_test.go",
         "Commit": "ec2fc9d6cb0954fb3b57201cf6133c48d8ca0d29",
@@ -192,8 +192,9 @@ regex = '''one-go-style-regex-for-this-rule'''
 path = '''a-file-path-regex'''
 # Array of strings used for metadata and reporting purposes.
 tags = ["tag","another tag"]
-# Int used to check shannon entropy of a specific group in a regex match. 
-entropyGroup = 3
+# Int used to extract secret from regex match and used as the group that will have 
+# its entropy checked if `entropy` is set. 
+secretGroup = 3
 # Float representing the minimum shannon entropy a regex group must have to be considered a secret. 
 entropy = 3.5
 # You can include an allowlist table for a single rule to reduce false positives or ignore commits
@@ -246,6 +247,27 @@ discord_client_secret = "8dyfuiRyq=vVc3RRr_edRk-fK__JItpZ"
                                                                                 Secret                
                                                                                                       
 
+#### A Note on Generic Secrets
+Let's continue with the example `discord_client_secret = "8dyfuiRyq=vVc3RRr_edRk-fK__JItpZ"`. 
+This secret would match both the `discord-client-secret` rule and the `generic-api-key` rule in the default config.
+```
+[[rules]]
+id = "discord-client-secret"
+description = "Discord client secret"
+regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9=_\-]{32})['\"]'''
+secretGroup = 3
+
+[[rules]]
+id = "generic-api-key"
+description = "Generic API Key"
+regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]'''
+entropy = 3.7
+secretGroup = 4
+```
+If gitleaks encountered `discord_client_secret = "8dyfuiRyq=vVc3RRr_edRk-fK__JItpZ"`, only the `discord` rule would report a finding because 
+the generic rule has the string `generic` somewhere in the rule's `id`. If a secret is encountered and both a `generic` and non-generic rule have discovered the same secret, the non-generic
+will be given precedence. 
+
 
 ## Exit Codes
 You can always set the exit code when leaks are encountered with the --exit-code flag. Default exit codes below:

+ 16 - 16
config/config.go

@@ -15,13 +15,13 @@ var DefaultConfig string
 type ViperConfig struct {
 	Description string
 	Rules       []struct {
-		ID           string
-		Description  string
-		Entropy      float64
-		EntropyGroup int
-		Regex        string
-		Path         string
-		Tags         []string
+		ID          string
+		Description string
+		Entropy     float64
+		SecretGroup int
+		Regex       string
+		Path        string
+		Tags        []string
 
 		Allowlist struct {
 			Regexes []string
@@ -73,21 +73,21 @@ func (vc *ViperConfig) Translate() (Config, error) {
 			configPathRegex = regexp.MustCompile(r.Path)
 		}
 		r := &Rule{
-			Description:    r.Description,
-			RuleID:         r.ID,
-			Regex:          configRegex,
-			Path:           configPathRegex,
-			EntropyReGroup: r.EntropyGroup,
-			Entropy:        r.Entropy,
-			Tags:           r.Tags,
+			Description: r.Description,
+			RuleID:      r.ID,
+			Regex:       configRegex,
+			Path:        configPathRegex,
+			SecretGroup: r.SecretGroup,
+			Entropy:     r.Entropy,
+			Tags:        r.Tags,
 			Allowlist: Allowlist{
 				Regexes: allowlistRegexes,
 				Paths:   allowlistPaths,
 				Commits: r.Allowlist.Commits,
 			},
 		}
-		if r.Regex != nil && r.EntropyReGroup > r.Regex.NumSubexp() {
-			return Config{}, fmt.Errorf("%s invalid regex entropy group %d, max regex entropy group %d", r.Description, r.EntropyReGroup, r.Regex.NumSubexp())
+		if r.Regex != nil && r.SecretGroup > r.Regex.NumSubexp() {
+			return Config{}, fmt.Errorf("%s invalid regex secret group %d, max regex secret group %d", r.Description, r.SecretGroup, r.Regex.NumSubexp())
 		}
 		rules = append(rules, r)
 

+ 23 - 34
config/config_test.go

@@ -74,13 +74,13 @@ func TestTranslate(t *testing.T) {
 			cfg: Config{
 				Rules: []*Rule{
 					{
-						Description:    "Discord API key",
-						Regex:          regexp.MustCompile(`(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]`),
-						RuleID:         "discord-api-key",
-						Allowlist:      Allowlist{},
-						Entropy:        3.5,
-						EntropyReGroup: 3,
-						Tags:           []string{},
+						Description: "Discord API key",
+						Regex:       regexp.MustCompile(`(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]`),
+						RuleID:      "discord-api-key",
+						Allowlist:   Allowlist{},
+						Entropy:     3.5,
+						SecretGroup: 3,
+						Tags:        []string{},
 					},
 				},
 			},
@@ -88,7 +88,7 @@ func TestTranslate(t *testing.T) {
 		{
 			cfgName:   "bad_entropy_group",
 			cfg:       Config{},
-			wantError: fmt.Errorf("Discord API key invalid regex entropy group 5, max regex entropy group 3"),
+			wantError: fmt.Errorf("Discord API key invalid regex secret group 5, max regex secret group 3"),
 		},
 	}
 
@@ -125,45 +125,34 @@ func TestIncludeEntropy(t *testing.T) {
 	}{
 		{
 			rule: Rule{
-				RuleID:         "generic-api-key",
-				EntropyReGroup: 4,
-				Entropy:        3.5,
-				Regex:          regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
+				RuleID:      "generic-api-key",
+				SecretGroup: 4,
+				Entropy:     3.5,
+				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
 			},
-			secret:  `Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+			secret:  `e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5`,
 			entropy: 3.7906235872459746,
 			include: true,
 		},
 		{
 			rule: Rule{
-				RuleID:         "generic-api-key",
-				EntropyReGroup: 4,
-				Entropy:        4,
-				Regex:          regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
+				RuleID:      "generic-api-key",
+				SecretGroup: 4,
+				Entropy:     4,
+				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
 			},
-			secret:  `Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`,
+			secret:  `e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5`,
 			entropy: 3.7906235872459746,
 			include: false,
 		},
 		{
 			rule: Rule{
-				RuleID:         "generic-api-key",
-				EntropyReGroup: 4,
-				Entropy:        3.0,
-				Regex:          regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
+				RuleID:      "generic-api-key",
+				SecretGroup: 4,
+				Entropy:     3.0,
+				Regex:       regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
 			},
-			secret:  `KeyboardInteractiveName = "ssh-keyboard-interactive"`,
-			entropy: 0,
-			include: false,
-		},
-		{
-			rule: Rule{
-				RuleID:         "generic-api-key",
-				EntropyReGroup: 4,
-				Entropy:        3.0,
-				Regex:          regexp.MustCompile(`(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]`),
-			},
-			secret:  `KeyboardInteractiveName = "ssh-keyboard-interactive"`,
+			secret:  `ssh-keyboard-interactive`,
 			entropy: 0,
 			include: false,
 		},

+ 100 - 6
config/gitleaks.toml

@@ -135,6 +135,7 @@ regex = '''\"type\": \"service_account\"'''
 id = "heroku-api-key"
 description = "Heroku API Key"
 regex = ''' (?i)(heroku[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "slack-web-hook"
@@ -155,16 +156,19 @@ regex = '''AGE-SECRET-KEY-1[QPZRY9X8GF2TVDW0S3JN54KHCE6MUA7L]{58}'''
 id = "facebook-token"
 description = "Facebook token"
 regex = '''(?i)(facebook[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "twitter-token"
 description = "Twitter token"
 regex = '''(?i)(twitter[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{35,44})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "adobe-client-id"
 description = "Adobe Client ID (Oauth Web)"
 regex = '''(?i)(adobe[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "adobe-client-secret"
@@ -180,265 +184,355 @@ regex = '''(LTAI)(?i)[a-z0-9]{20}'''
 id = "alibaba-secret-key"
 description = "Alibaba Secret Key"
 regex = '''(?i)(alibaba[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{30})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "asana-client-id"
 description = "Asana Client ID"
 regex = '''(?i)(asana[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9]{16})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "asana-client-secret"
 description = "Asana Client Secret"
 regex = '''(?i)(asana[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "atlassian-api-token"
 description = "Atlassian API token"
 regex = '''(?i)(atlassian[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{24})['\"]'''
+secretGroup = 3
 
 [[rules]]
 id = "bitbucket-client-id"
 description = "Bitbucket client ID"
 regex = '''(?i)(bitbucket[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "bitbucket-client-secret"
 description = "Bitbucket client secret"
 regex = '''(?i)(bitbucket[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9_\-]{64})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "beamer-api-token"
 description = "Beamer API token"
 regex = '''(?i)(beamer[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](b_[a-z0-9=_\-]{44})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "clojars-api-token"
 description = "Clojars API token"
 regex = '''(CLOJARS_)(?i)[a-z0-9]{60}'''
 
 [[rules]]
+id = "contentful-delivery-api-token"
 description = "Contentful delivery API token"
 regex = '''(?i)(contentful[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{43})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "contentful-preview-api-token"
 description = "Contentful preview API token"
 regex = '''(?i)(contentful[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{43})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "databricks-api-token"
 description = "Databricks API token"
 regex = '''dapi[a-h0-9]{32}'''
 
 [[rules]]
+id = "discord-api-token"
 description = "Discord API key"
 regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "discord-client-id"
 description = "Discord client ID"
 regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9]{18})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "discord-client-secret"
 description = "Discord client secret"
 regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9=_\-]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "doppler-api-token"
 description = "Doppler API token"
 regex = '''['\"](dp\.pt\.)(?i)[a-z0-9]{43}['\"]'''
 
 [[rules]]
+id = "dropbox-api-secret"
 description = "Dropbox API secret/key"
 regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{15})['\"]'''
 
 [[rules]]
+id = "dropbox-sl-api-token"
 description = "Dropbox short lived API token"
 regex = '''(?i)(dropbox[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](sl\.[a-z0-9\-=_]{135})['\"]'''
 
 [[rules]]
+id = "dropbox-ll-api-token"
 description = "Dropbox long lived API token"
 regex = '''(?i)(dropbox)(.{0,20})['\"](?i)[a-z0-9]{11}(AAAAAAAAAA)[a-z0-9-_=]{43}['\"]'''
 
 [[rules]]
+id = "duffel-api-token"
 description = "Duffel API token"
 regex = '''['\"]duffel_(test|live)_(?i)[a-z0-9_-]{43}['\"]'''
 
 [[rules]]
+id = "dynatrace-api-token"
 description = "Dynatrace API token"
 regex = '''['\"]dt0c01\.(?i)[a-z0-9]{24}\.[a-z0-9]{64}['\"]'''
 
 [[rules]]
+id = "easypost-api-token"
 description = "EasyPost API token"
 regex = '''['\"]EZAK(?i)[a-z0-9]{54}['\"]'''
 
 [[rules]]
+id = "easypost-test-api-token"
 description = "EasyPost test API token"
 regex = '''['\"]EZTK(?i)[a-z0-9]{54}['\"]'''
 
 [[rules]]
+id = "fastly-api-token"
 description = "Fastly API token"
 regex = '''(?i)(fastly[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9\-=_]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "finicity-client-secret"
 description = "Finicity client secret"
 regex = '''(?i)(finicity[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{20})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "finicity-api-token"
 description = "Finicity API token"
 regex = '''(?i)(finicity[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "flutterweave-public-key"
 description = "Flutterweave public key"
 regex = '''FLWPUBK_TEST-(?i)[a-h0-9]{32}-X'''
 
 [[rules]]
+id = "flutterweave-secret-key"
 description = "Flutterweave secret key"
 regex = '''FLWSECK_TEST-(?i)[a-h0-9]{32}-X'''
 
 [[rules]]
+id = "flutterweave-enc-key"
 description = "Flutterweave encrypted key"
 regex = '''FLWSECK_TEST[a-h0-9]{12}'''
 
 [[rules]]
+id = "frameio-api-token"
 description = "Frame.io API token"
 regex = '''fio-u-(?i)[a-z0-9-_=]{64}'''
 
 [[rules]]
+id = "gocardless-api-token"
 description = "GoCardless API token"
 regex = '''['\"]live_(?i)[a-z0-9-_=]{40}['\"]'''
 
 [[rules]]
+id = "grafana-api-token"
 description = "Grafana API token"
 regex = '''['\"]eyJrIjoi(?i)[a-z0-9-_=]{72,92}['\"]'''
 
 [[rules]]
+id = "hashicorp-tf-api-token"
 description = "Hashicorp Terraform user/org API token"
 regex = '''['\"](?i)[a-z0-9]{14}\.atlasv1\.[a-z0-9-_=]{60,70}['\"]'''
 
 [[rules]]
+id = "hubspot-api-token"
 description = "Hubspot API token"
 regex = '''(?i)(hubspot[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "intercom-api-token"
 description = "Intercom API token"
 regex = '''(?i)(intercom[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9=_]{60})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "intercom-client-secret"
 description = "Intercom client secret/ID"
 regex = '''(?i)(intercom[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "ionic-api-token"
 description = "Ionic API token"
 regex = '''ion_(?i)[a-z0-9]{42}'''
 
 [[rules]]
+id = "linear-api-token"
 description = "Linear API token"
 regex = '''lin_api_(?i)[a-z0-9]{40}'''
 
 [[rules]]
+id = "linear-client-secret"
 description = "Linear client secret/ID"
 regex = '''(?i)(linear[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "lob-api-key"
 description = "Lob API Key"
 regex = '''(?i)(lob[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]((live|test)_[a-f0-9]{35})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "lob-pub-api-key"
 description = "Lob Publishable API Key"
 regex = '''(?i)(lob[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]((test|live)_pub_[a-f0-9]{31})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "mailchimp-api-key"
 description = "Mailchimp API key"
 regex = '''(?i)(mailchimp[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-f0-9]{32}-us20)['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "mailgun-private-api-token"
 description = "Mailgun private API token"
 regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](key-[a-f0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "mailgun-pub-key"
 description = "Mailgun public validation key"
 regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"](pubkey-[a-f0-9]{32})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "mailgun-signing-key"
 description = "Mailgun webhook signing key"
 regex = '''(?i)(mailgun[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{32}-[a-h0-9]{8}-[a-h0-9]{8})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "mapbox-api-token"
 description = "Mapbox API token"
 regex = '''(?i)(pk\.[a-z0-9]{60}\.[a-z0-9]{22})'''
 
 [[rules]]
+id = "messagebird-api-token"
 description = "MessageBird API token"
 regex = '''(?i)(messagebird[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{25})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "messagebird-client-id"
 description = "MessageBird API client ID"
 regex = '''(?i)(messagebird[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{8}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{4}-[a-h0-9]{12})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "new-relic-user-api-key"
 description = "New Relic user API Key"
 regex = '''['\"](NRAK-[A-Z0-9]{27})['\"]'''
 
 [[rules]]
+id = "new-relic-user-api-id"
 description = "New Relic user API ID"
 regex = '''(?i)(newrelic[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([A-Z0-9]{64})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "new-relic-browser-api-token"
 description = "New Relic ingest browser API token"
 regex = '''['\"](NRJS-[a-f0-9]{19})['\"]'''
 
 [[rules]]
+id = "npm-access-token"
 description = "npm access token"
 regex = '''['\"](npm_(?i)[a-z0-9]{36})['\"]'''
 
 [[rules]]
+id = "planetscale-password"
 description = "Planetscale password"
 regex = '''pscale_pw_(?i)[a-z0-9\-_\.]{43}'''
 
 [[rules]]
+id = "planetscale-api-token"
 description = "Planetscale API token"
 regex = '''pscale_tkn_(?i)[a-z0-9\-_\.]{43}'''
 
 [[rules]]
+id = "postman-api-token"
 description = "Postman API token"
 regex = '''PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34}'''
 
 [[rules]]
+id = "pulumi-api-token"
 description = "Pulumi API token"
 regex = '''pul-[a-f0-9]{40}'''
 
 [[rules]]
+id = "rubygems-api-token"
 description = "Rubygem API token"
 regex = '''rubygems_[a-f0-9]{48}'''
 
 [[rules]]
+id = "sendgrid-api-token"
 description = "Sendgrid API token"
 regex = '''SG\.(?i)[a-z0-9_\-\.]{66}'''
 
 [[rules]]
+id = "sendinblue-api-token"
 description = "Sendinblue API token"
 regex = '''xkeysib-[a-f0-9]{64}\-(?i)[a-z0-9]{16}'''
 
 [[rules]]
+id = "shippo-api-token"
 description = "Shippo API token"
 regex = '''shippo_(live|test)_[a-f0-9]{40}'''
 
 [[rules]]
+id = "linedin-client-secret"
 description = "Linkedin Client secret"
 regex = '''(?i)(linkedin[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z]{16})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "linedin-client-id"
 description = "Linkedin Client ID"
 regex = '''(?i)(linkedin[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{14})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "twitch-api-token"
 description = "Twitch API token"
 regex = '''(?i)(twitch[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-z0-9]{30})['\"]'''
+secretGroup = 3
 
 [[rules]]
+id = "typeform-api-token"
 description = "Typeform API token"
 regex = '''(?i)(typeform[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}(tfp_[a-z0-9\-_\.=]{59})'''
+secretGroup = 3
 
-# [[rules]]
-# id = "generic-api-key"
-# description = "Generic API Key"
-# regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]'''
-# entropy = 3.7
-# entropyGroup = 4
+[[rules]]
+id = "generic-api-key"
+description = "Generic API Key"
+regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]'''
+entropy = 3.7
+secretGroup = 4
 
 
 [allowlist]

+ 10 - 16
config/rule.go

@@ -6,23 +6,17 @@ import (
 )
 
 type Rule struct {
-	Description    string
-	RuleID         string
-	Entropy        float64
-	EntropyReGroup int
-	Regex          *regexp.Regexp
-	Path           *regexp.Regexp
-	Tags           []string
-	Allowlist      Allowlist
+	Description string
+	RuleID      string
+	Entropy     float64
+	SecretGroup int
+	Regex       *regexp.Regexp
+	Path        *regexp.Regexp
+	Tags        []string
+	Allowlist   Allowlist
 }
 
 func (r *Rule) IncludeEntropy(secret string) (bool, float64) {
-	groups := r.Regex.FindStringSubmatch(secret)
-	if len(groups)-1 > r.EntropyReGroup || len(groups) == 0 {
-		// Config validation should prevent this
-		return false, 0.0
-	}
-
 	// NOTE: this is a goofy hack to get around the fact there golang's regex engine
 	// does not support positive lookaheads. Ideally we would want to add a
 	// restriction on generic rules regex that requires the secret match group
@@ -30,12 +24,12 @@ func (r *Rule) IncludeEntropy(secret string) (bool, float64) {
 	// check if the ruleid is prepended with "generic" and enforces the
 	// secret contains both digits and alphabetical characters.
 	if strings.HasPrefix(r.RuleID, "generic") {
-		if !containsDigit(groups[r.EntropyReGroup]) {
+		if !containsDigit(secret) {
 			return false, 0.0
 		}
 	}
 	// group = 0 will check the entropy of the whole regex match
-	e := shannonEntropy(groups[r.EntropyReGroup])
+	e := shannonEntropy(secret)
 	if e > r.Entropy {
 		return true, e
 	}

+ 48 - 5
detect/detect.go

@@ -6,6 +6,7 @@ import (
 	"regexp"
 	"strings"
 
+	"github.com/rs/zerolog/log"
 	"github.com/zricethezav/gitleaks/v8/config"
 	"github.com/zricethezav/gitleaks/v8/report"
 )
@@ -42,7 +43,7 @@ func DetectFindings(cfg config.Config, b []byte, filePath string, commit string)
 						Description: r.Description,
 						File:        filePath,
 						RuleID:      r.RuleID,
-						Context:     fmt.Sprintf("file detected: %s", filePath),
+						Match:       fmt.Sprintf("file detected: %s", filePath),
 						Tags:        r.Tags,
 					}
 					findings = append(findings, f)
@@ -59,6 +60,7 @@ func DetectFindings(cfg config.Config, b []byte, filePath string, commit string)
 		matchIndices := r.Regex.FindAllIndex(b, -1)
 		for _, m := range matchIndices {
 			location := getLocation(linePairs, m[0], m[1])
+			secret := strings.Trim(string(b[m[0]:m[1]]), "\n")
 			f := report.Finding{
 				Description: r.Description,
 				File:        filePath,
@@ -67,8 +69,8 @@ func DetectFindings(cfg config.Config, b []byte, filePath string, commit string)
 				EndLine:     location.endLine,
 				StartColumn: location.startColumn,
 				EndColumn:   location.endColumn,
-				Secret:      strings.Trim(string(b[m[0]:m[1]]), "\n"),
-				Context:     limit(strings.Trim(string(b[location.startLineIndex:location.endLineIndex]), "\n")),
+				Secret:      secret,
+				Match:       secret,
 				Tags:        r.Tags,
 			}
 
@@ -76,8 +78,20 @@ func DetectFindings(cfg config.Config, b []byte, filePath string, commit string)
 				continue
 			}
 
+			// extract secret from secret group if set
+			if r.SecretGroup != 0 {
+				groups := r.Regex.FindStringSubmatch(secret)
+				if len(groups)-1 > r.SecretGroup || len(groups) == 0 {
+					// Config validation should prevent this
+					break
+				}
+				secret = groups[r.SecretGroup]
+				f.Secret = secret
+			}
+
+			// extract secret from secret group if set
 			if r.EntropySet() {
-				include, entropy := r.IncludeEntropy(strings.Trim(string(b[m[0]:m[1]]), "\n"))
+				include, entropy := r.IncludeEntropy(secret)
 				if include {
 					f.Entropy = float32(entropy)
 					findings = append(findings, f)
@@ -88,7 +102,7 @@ func DetectFindings(cfg config.Config, b []byte, filePath string, commit string)
 		}
 	}
 
-	return findings
+	return dedupe(findings)
 }
 
 func limit(s string) string {
@@ -103,3 +117,32 @@ func printFinding(f report.Finding) {
 	b, _ = json.MarshalIndent(f, "", "	")
 	fmt.Println(string(b))
 }
+
+func dedupe(findings []report.Finding) []report.Finding {
+	var retFindings []report.Finding
+	for _, f := range findings {
+		include := true
+		if strings.Contains(strings.ToLower(f.RuleID), "generic") {
+			for _, fPrime := range findings {
+				if f.StartLine == fPrime.StartLine &&
+					f.EndLine == fPrime.EndLine &&
+					f.Commit == fPrime.Commit &&
+					f.RuleID != fPrime.RuleID &&
+					strings.Contains(fPrime.Secret, f.Secret) &&
+					!strings.Contains(strings.ToLower(fPrime.RuleID), "generic") {
+
+					genericMatch := strings.Replace(f.Match, f.Secret, "REDACTED", -1)
+					betterMatch := strings.Replace(fPrime.Match, fPrime.Secret, "REDACTED", -1)
+					log.Debug().Msgf("skipping %s finding (%s), %s rule takes precendence (%s)", f.RuleID, genericMatch, fPrime.RuleID, betterMatch)
+					include = false
+					break
+				}
+			}
+		}
+		if include {
+			retFindings = append(retFindings, f)
+		}
+	}
+
+	return retFindings
+}

+ 7 - 8
detect/detect_test.go

@@ -30,6 +30,7 @@ func TestDetectFindings(t *testing.T) {
 				{
 					Description: "AWS Access Key",
 					Secret:      "AKIALALEMEL33243OLIA",
+					Match:       "AKIALALEMEL33243OLIA",
 					File:        "tmp.go",
 					RuleID:      "aws-access-key",
 					Tags:        []string{"key", "AWS"},
@@ -62,7 +63,8 @@ func TestDetectFindings(t *testing.T) {
 			expectedFindings: []report.Finding{
 				{
 					Description: "Discord API key",
-					Secret:      "Discord_Public_Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
+					Match:       "Discord_Public_Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
+					Secret:      "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5",
 					File:        "tmp.go",
 					RuleID:      "discord-api-key",
 					Tags:        []string{},
@@ -83,7 +85,8 @@ func TestDetectFindings(t *testing.T) {
 			expectedFindings: []report.Finding{
 				{
 					Description: "Generic API Key",
-					Secret:      "Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
+					Match:       "Key = \"e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5\"",
+					Secret:      "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5",
 					File:        "tmp.py",
 					RuleID:      "generic-api-key",
 					Tags:        []string{},
@@ -98,7 +101,7 @@ func TestDetectFindings(t *testing.T) {
 			expectedFindings: []report.Finding{
 				{
 					Description: "Python Files",
-					Context:     "file detected: tmp.py",
+					Match:       "file detected: tmp.py",
 					File:        "tmp.py",
 					RuleID:      "python-files-only",
 					Tags:        []string{},
@@ -110,7 +113,7 @@ func TestDetectFindings(t *testing.T) {
 			bytes:            []byte(`const Discord_Public_Key = "e7322523fb86ed64c836a979cf8465fbd436378c653c1db38f9ae87bc62a6fd5"`),
 			filePath:         "tmp.go",
 			expectedFindings: []report.Finding{},
-			wantError:        fmt.Errorf("Discord API key invalid regex entropy group 5, max regex entropy group 3"),
+			wantError:        fmt.Errorf("Discord API key invalid regex secret group 5, max regex secret group 3"),
 		},
 		{
 			cfgName:          "simple",
@@ -148,10 +151,6 @@ func TestDetectFindings(t *testing.T) {
 		}
 
 		findings := DetectFindings(cfg, tt.bytes, tt.filePath, tt.commit)
-		for _, f := range findings {
-			f.Context = "" // remove lines cause copying and pasting them has some wack formatting
-			f.Date = ""
-		}
 		assert.ElementsMatch(t, tt.expectedFindings, findings)
 	}
 }

+ 2 - 2
detect/files_test.go

@@ -29,7 +29,7 @@ func TestFromFiles(t *testing.T) {
 					EndLine:     19,
 					StartColumn: 16,
 					EndColumn:   35,
-					Context:     "\tawsToken := \"AKIALALEMEL33243OLIA\"",
+					Match:       "AKIALALEMEL33243OLIA",
 					Secret:      "AKIALALEMEL33243OLIA",
 					File:        "../testdata/repos/nogit/main.go",
 					RuleID:      "aws-access-key",
@@ -47,7 +47,7 @@ func TestFromFiles(t *testing.T) {
 					EndLine:     19,
 					StartColumn: 16,
 					EndColumn:   35,
-					Context:     "\tawsToken := \"AKIALALEMEL33243OLIA\"",
+					Match:       "AKIALALEMEL33243OLIA",
 					Secret:      "AKIALALEMEL33243OLIA",
 					File:        "../testdata/repos/nogit/main.go",
 					RuleID:      "aws-access-key",

+ 1 - 1
detect/git.go

@@ -59,7 +59,7 @@ func FromGit(files <-chan *gitdiff.File, cfg config.Config, outputOptions Option
 
 				for _, fi := range DetectFindings(cfg, []byte(tf.Raw(gitdiff.OpAdd)), f.NewName, commitSHA) {
 					// don't add to start/end lines if finding is from a file only rule
-					if !strings.HasPrefix(fi.Context, "file detected") {
+					if !strings.HasPrefix(fi.Match, "file detected") {
 						fi.StartLine += int(tf.NewPosition)
 						fi.EndLine += int(tf.NewPosition)
 					}

+ 1 - 1
detect/git_test.go

@@ -119,7 +119,7 @@ func TestFromGit(t *testing.T) {
 
 		findings := FromGit(files, cfg, tt.opts)
 		for _, f := range findings {
-			f.Context = "" // remove lines cause copying and pasting them has some wack formatting
+			f.Match = "" // remove lines cause copying and pasting them has some wack formatting
 			f.Date = ""
 		}
 		assert.ElementsMatch(t, tt.expectedFindings, findings)

+ 2 - 2
report/csv.go

@@ -17,7 +17,7 @@ func writeCsv(f []*Finding, w io.WriteCloser) error {
 		"Commit",
 		"File",
 		"Secret",
-		"Context",
+		"Match",
 		"StartLine",
 		"EndLine",
 		"StartColumn",
@@ -35,7 +35,7 @@ func writeCsv(f []*Finding, w io.WriteCloser) error {
 			f.Commit,
 			f.File,
 			f.Secret,
-			f.Context,
+			f.Match,
 			strconv.Itoa(f.StartLine),
 			strconv.Itoa(f.EndLine),
 			strconv.Itoa(f.StartColumn),

+ 1 - 1
report/csv_test.go

@@ -20,7 +20,7 @@ func TestWriteCSV(t *testing.T) {
 			findings: []*Finding{
 				{
 					RuleID:      "test-rule",
-					Context:     "line containing secret",
+					Match:       "line containing secret",
 					Secret:      "a secret",
 					StartLine:   1,
 					EndLine:     2,

+ 2 - 2
report/finding.go

@@ -11,7 +11,7 @@ type Finding struct {
 	StartColumn int
 	EndColumn   int
 
-	Context string
+	Match string
 
 	// Secret contains the full content of what is matched in
 	// the tree-sitter query.
@@ -37,6 +37,6 @@ type Finding struct {
 
 // Redact removes sensitive information from a finding.
 func (f *Finding) Redact() {
-	f.Context = strings.Replace(f.Context, f.Secret, "REDACTED", -1)
+	f.Match = strings.Replace(f.Match, f.Secret, "REDACTED", -1)
 	f.Secret = "REDACT"
 }

+ 2 - 2
report/finding_test.go

@@ -11,8 +11,8 @@ func TestRedact(t *testing.T) {
 			redact: true,
 			findings: []Finding{
 				{
-					Secret:  "line containing secret",
-					Context: "secret",
+					Secret: "line containing secret",
+					Match:  "secret",
 				},
 			}},
 	}

+ 1 - 1
report/json_test.go

@@ -22,7 +22,7 @@ func TestWriteJSON(t *testing.T) {
 
 					Description: "",
 					RuleID:      "test-rule",
-					Context:     "line containing secret",
+					Match:       "line containing secret",
 					Secret:      "a secret",
 					StartLine:   1,
 					EndLine:     2,

+ 1 - 1
report/sarif_test.go

@@ -29,7 +29,7 @@ func TestWriteSarif(t *testing.T) {
 
 					Description: "",
 					RuleID:      "test-rule",
-					Context:     "line containing secret",
+					Match:       "line containing secret",
 					Secret:      "a secret",
 					StartLine:   1,
 					EndLine:     2,

+ 1 - 1
testdata/config/bad_entropy_group.toml

@@ -4,5 +4,5 @@ title = "gitleaks config"
 id = "discord-api-key"
 description = "Discord API key"
 regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]'''
-entropyGroup = 5
+secretGroup = 5
 entropy = 3.5

+ 1 - 1
testdata/config/entropy_group.toml

@@ -4,5 +4,5 @@ title = "gitleaks config"
 id = "discord-api-key"
 description = "Discord API key"
 regex = '''(?i)(discord[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([a-h0-9]{64})['\"]'''
-entropyGroup = 3
+secretGroup = 3
 entropy = 3.5

+ 1 - 1
testdata/config/generic.toml

@@ -5,4 +5,4 @@ description = "Generic API Key"
 id = "generic-api-key"
 regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]'''
 entropy = 3.7
-entropyGroup = 4
+secretGroup = 4

+ 1 - 1
testdata/config/generic_with_py_path.toml

@@ -6,4 +6,4 @@ id = "generic-api-key"
 regex = '''(?i)((key|api|token|secret|password)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z\-_=]{8,64})['\"]'''
 path = '''.py'''
 entropy = 3.7
-entropyGroup = 4
+secretGroup = 4

+ 1 - 1
testdata/expected/report/csv_simple.csv

@@ -1,2 +1,2 @@
-RuleID,Commit,File,Secret,Context,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email
+RuleID,Commit,File,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email
 test-rule,0000000000000000,auth.py,a secret,line containing secret,1,2,1,2,John Doe,opps,10-19-2003,johndoe@gmail.com

+ 1 - 1
testdata/expected/report/json_simple.json

@@ -5,7 +5,7 @@
   "EndLine": 2,
   "StartColumn": 1,
   "EndColumn": 2,
-  "Context": "line containing secret",
+  "Match": "line containing secret",
   "Secret": "a secret",
   "File": "auth.py",
   "Commit": "0000000000000000",