|
|
@@ -25,6 +25,8 @@ const (
|
|
|
chunkSize = 10 * 1_000 // 10kb
|
|
|
)
|
|
|
|
|
|
+var newLineRegexp = regexp.MustCompile("\n")
|
|
|
+
|
|
|
// Detector is the main detector struct
|
|
|
type Detector struct {
|
|
|
// Config is the configuration for the detector
|
|
|
@@ -38,6 +40,9 @@ type Detector struct {
|
|
|
// verbose is a flag to print findings
|
|
|
Verbose bool
|
|
|
|
|
|
+ // MaxDecodeDepths limits how many recursive decoding passes are allowed
|
|
|
+ MaxDecodeDepth int
|
|
|
+
|
|
|
// files larger than this will be skipped
|
|
|
MaxTargetMegaBytes int
|
|
|
|
|
|
@@ -95,10 +100,6 @@ type Fragment struct {
|
|
|
// newlineIndices is a list of indices of newlines in the raw content.
|
|
|
// This is used to calculate the line location of a finding
|
|
|
newlineIndices [][]int
|
|
|
-
|
|
|
- // keywords is a map of all the keywords contain within the contents
|
|
|
- // of this fragment
|
|
|
- keywords map[string]bool
|
|
|
}
|
|
|
|
|
|
// NewDetector creates a new detector with the given config
|
|
|
@@ -175,9 +176,6 @@ func (d *Detector) DetectString(content string) []report.Finding {
|
|
|
func (d *Detector) Detect(fragment Fragment) []report.Finding {
|
|
|
var findings []report.Finding
|
|
|
|
|
|
- // initiate fragment keywords
|
|
|
- fragment.keywords = make(map[string]bool)
|
|
|
-
|
|
|
// check if filepath is allowed
|
|
|
if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
|
|
|
fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) {
|
|
|
@@ -185,38 +183,62 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
|
|
|
}
|
|
|
|
|
|
// add newline indices for location calculation in detectRule
|
|
|
- fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
|
|
|
+ fragment.newlineIndices = newLineRegexp.FindAllStringIndex(fragment.Raw, -1)
|
|
|
+
|
|
|
+ // setup variables to handle different decoding passes
|
|
|
+ currentRaw := fragment.Raw
|
|
|
+ encodedSegments := []EncodedSegment{}
|
|
|
+ currentDecodeDepth := 0
|
|
|
+ decoder := NewDecoder()
|
|
|
+
|
|
|
+ for {
|
|
|
+ // build keyword map for prefiltering rules
|
|
|
+ keywords := make(map[string]bool)
|
|
|
+ normalizedRaw := strings.ToLower(currentRaw)
|
|
|
+ matches := d.prefilter.MatchString(normalizedRaw)
|
|
|
+ for _, m := range matches {
|
|
|
+ keywords[normalizedRaw[m.Pos():int(m.Pos())+len(m.Match())]] = true
|
|
|
+ }
|
|
|
|
|
|
- // build keyword map for prefiltering rules
|
|
|
- normalizedRaw := strings.ToLower(fragment.Raw)
|
|
|
- matches := d.prefilter.MatchString(normalizedRaw)
|
|
|
- for _, m := range matches {
|
|
|
- fragment.keywords[normalizedRaw[m.Pos():int(m.Pos())+len(m.Match())]] = true
|
|
|
- }
|
|
|
+ for _, rule := range d.Config.Rules {
|
|
|
+ if len(rule.Keywords) == 0 {
|
|
|
+ // if no keywords are associated with the rule always scan the
|
|
|
+ // fragment using the rule
|
|
|
+ findings = append(findings, d.detectRule(fragment, currentRaw, rule, encodedSegments)...)
|
|
|
+ continue
|
|
|
+ }
|
|
|
|
|
|
- for _, rule := range d.Config.Rules {
|
|
|
- if len(rule.Keywords) == 0 {
|
|
|
- // if not keywords are associated with the rule always scan the
|
|
|
- // fragment using the rule
|
|
|
- findings = append(findings, d.detectRule(fragment, rule)...)
|
|
|
- continue
|
|
|
- }
|
|
|
- fragmentContainsKeyword := false
|
|
|
- // check if keywords are in the fragment
|
|
|
- for _, k := range rule.Keywords {
|
|
|
- if _, ok := fragment.keywords[strings.ToLower(k)]; ok {
|
|
|
- fragmentContainsKeyword = true
|
|
|
+ // check if keywords are in the fragment
|
|
|
+ for _, k := range rule.Keywords {
|
|
|
+ if _, ok := keywords[strings.ToLower(k)]; ok {
|
|
|
+ findings = append(findings, d.detectRule(fragment, currentRaw, rule, encodedSegments)...)
|
|
|
+ break
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- if fragmentContainsKeyword {
|
|
|
- findings = append(findings, d.detectRule(fragment, rule)...)
|
|
|
+
|
|
|
+ // increment the depth by 1 as we start our decoding pass
|
|
|
+ currentDecodeDepth++
|
|
|
+
|
|
|
+ // stop the loop if we've hit our max decoding depth
|
|
|
+ if currentDecodeDepth > d.MaxDecodeDepth {
|
|
|
+ break
|
|
|
+ }
|
|
|
+
|
|
|
+ // decode the currentRaw for the next pass
|
|
|
+ currentRaw, encodedSegments = decoder.decode(currentRaw, encodedSegments)
|
|
|
+
|
|
|
+ // stop the loop when there's nothing else to decode
|
|
|
+ if len(encodedSegments) == 0 {
|
|
|
+ break
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
return filter(findings, d.Redact)
|
|
|
}
|
|
|
|
|
|
// detectRule scans the given fragment for the given rule and returns a list of findings
|
|
|
-func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Finding {
|
|
|
+func (d *Detector) detectRule(fragment Fragment, currentRaw string, rule config.Rule, encodedSegments []EncodedSegment) []report.Finding {
|
|
|
var findings []report.Finding
|
|
|
|
|
|
// check if filepath or commit is allowed for this rule
|
|
|
@@ -225,7 +247,7 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
|
|
|
return findings
|
|
|
}
|
|
|
|
|
|
- if rule.Path != nil && rule.Regex == nil {
|
|
|
+ if rule.Path != nil && rule.Regex == nil && len(encodedSegments) == 0 {
|
|
|
// Path _only_ rule
|
|
|
if rule.Path.MatchString(fragment.FilePath) {
|
|
|
finding := report.Finding{
|
|
|
@@ -252,23 +274,39 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
|
|
|
return findings
|
|
|
}
|
|
|
|
|
|
- // If flag configure and raw data size bigger then the flag
|
|
|
+ // if flag configure and raw data size bigger then the flag
|
|
|
if d.MaxTargetMegaBytes > 0 {
|
|
|
- rawLength := len(fragment.Raw) / 1000000
|
|
|
+ rawLength := len(currentRaw) / 1000000
|
|
|
if rawLength > d.MaxTargetMegaBytes {
|
|
|
log.Debug().Msgf("skipping file: %s scan due to size: %d", fragment.FilePath, rawLength)
|
|
|
return findings
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
|
|
|
- for _, matchIndex := range matchIndices {
|
|
|
- // extract secret from match
|
|
|
- secret := strings.Trim(fragment.Raw[matchIndex[0]:matchIndex[1]], "\n")
|
|
|
-
|
|
|
- // Fixes: https://github.com/gitleaks/gitleaks/issues/1352
|
|
|
- // removes the incorrectly following line that was detected by regex expression '\n'
|
|
|
- matchIndex[1] = matchIndex[0] + len(secret)
|
|
|
+ // use currentRaw instead of fragment.Raw since this represents the current
|
|
|
+ // decoding pass on the text
|
|
|
+ for _, matchIndex := range rule.Regex.FindAllStringIndex(currentRaw, -1) {
|
|
|
+ // Extract secret from match
|
|
|
+ secret := strings.Trim(currentRaw[matchIndex[0]:matchIndex[1]], "\n")
|
|
|
+
|
|
|
+ // For any meta data from decoding
|
|
|
+ var metaTags []string
|
|
|
+
|
|
|
+ // Check if the decoded portions of the segment overlap with the match
|
|
|
+ // to see if its potentially a new match
|
|
|
+ if len(encodedSegments) > 0 {
|
|
|
+ if segment := segmentWithDecodedOverlap(encodedSegments, matchIndex[0], matchIndex[1]); segment != nil {
|
|
|
+ matchIndex = segment.adjustMatchIndex(matchIndex)
|
|
|
+ metaTags = append(metaTags, segment.tags()...)
|
|
|
+ } else {
|
|
|
+ // This item has already been added to a finding
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // Fixes: https://github.com/gitleaks/gitleaks/issues/1352
|
|
|
+ // removes the incorrectly following line that was detected by regex expression '\n'
|
|
|
+ matchIndex[1] = matchIndex[0] + len(secret)
|
|
|
+ }
|
|
|
|
|
|
// determine location of match. Note that the location
|
|
|
// in the finding will be the line/column numbers of the _match_
|
|
|
@@ -291,7 +329,7 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
|
|
|
EndColumn: loc.endColumn,
|
|
|
Secret: secret,
|
|
|
Match: secret,
|
|
|
- Tags: rule.Tags,
|
|
|
+ Tags: append(rule.Tags, metaTags...),
|
|
|
Line: fragment.Raw[loc.startLineIndex:loc.endLineIndex],
|
|
|
}
|
|
|
|
|
|
@@ -327,6 +365,12 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ // check if the secret is in the list of stopwords
|
|
|
+ if rule.Allowlist.ContainsStopWord(finding.Secret) ||
|
|
|
+ d.Config.Allowlist.ContainsStopWord(finding.Secret) {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
// check if the regexTarget is defined in the allowlist "regexes" entry
|
|
|
allowlistTarget := finding.Secret
|
|
|
switch rule.Allowlist.RegexTarget {
|
|
|
@@ -348,12 +392,6 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
|
|
|
continue
|
|
|
}
|
|
|
|
|
|
- // check if the secret is in the list of stopwords
|
|
|
- if rule.Allowlist.ContainsStopWord(finding.Secret) ||
|
|
|
- d.Config.Allowlist.ContainsStopWord(finding.Secret) {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
// check entropy
|
|
|
entropy := shannonEntropy(finding.Secret)
|
|
|
finding.Entropy = float32(entropy)
|