| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- package detect
- import (
- "encoding/json"
- "fmt"
- "math"
- "strings"
- "time"
- "github.com/zricethezav/gitleaks/v8/report"
- "github.com/gitleaks/go-gitdiff/gitdiff"
- "github.com/rs/zerolog/log"
- )
- // augmentGitFinding updates the start and end line numbers of a finding to include the
- // delta from the git diff
- func augmentGitFinding(finding report.Finding, textFragment *gitdiff.TextFragment, f *gitdiff.File) report.Finding {
- if !strings.HasPrefix(finding.Match, "file detected") {
- finding.StartLine += int(textFragment.NewPosition)
- finding.EndLine += int(textFragment.NewPosition)
- }
- if f.PatchHeader != nil {
- finding.Commit = f.PatchHeader.SHA
- finding.Message = f.PatchHeader.Message()
- if f.PatchHeader.Author != nil {
- finding.Author = f.PatchHeader.Author.Name
- finding.Email = f.PatchHeader.Author.Email
- }
- finding.Date = f.PatchHeader.AuthorDate.UTC().Format(time.RFC3339)
- }
- finding.Fingerprint = fmt.Sprintf("%s:%s:%s:%d", finding.Commit, finding.File, finding.RuleID, finding.StartLine)
- return finding
- }
- // shannonEntropy calculates the entropy of data using the formula defined here:
- // https://en.wiktionary.org/wiki/Shannon_entropy
- // Another way to think about what this is doing is calculating the number of bits
- // needed to on average encode the data. So, the higher the entropy, the more random the data, the
- // more bits needed to encode that data.
- func shannonEntropy(data string) (entropy float64) {
- if data == "" {
- return 0
- }
- charCounts := make(map[rune]int)
- for _, char := range data {
- charCounts[char]++
- }
- invLength := 1.0 / float64(len(data))
- for _, count := range charCounts {
- freq := float64(count) * invLength
- entropy -= freq * math.Log2(freq)
- }
- return entropy
- }
- // filter will dedupe and redact findings
- func filter(findings []report.Finding, redact bool) []report.Finding {
- var retFindings []report.Finding
- for _, f := range findings {
- include := true
- if strings.Contains(strings.ToLower(f.RuleID), "generic") {
- for _, fPrime := range findings {
- if f.StartLine == fPrime.StartLine &&
- f.Commit == fPrime.Commit &&
- f.RuleID != fPrime.RuleID &&
- strings.Contains(fPrime.Secret, f.Secret) &&
- !strings.Contains(strings.ToLower(fPrime.RuleID), "generic") {
- genericMatch := strings.Replace(f.Match, f.Secret, "REDACTED", -1)
- betterMatch := strings.Replace(fPrime.Match, fPrime.Secret, "REDACTED", -1)
- log.Trace().Msgf("skipping %s finding (%s), %s rule takes precendence (%s)", f.RuleID, genericMatch, fPrime.RuleID, betterMatch)
- include = false
- break
- }
- }
- }
- if redact {
- f.Redact()
- }
- if include {
- retFindings = append(retFindings, f)
- }
- }
- return retFindings
- }
- func printFinding(f report.Finding) {
- var b []byte
- b, _ = json.MarshalIndent(f, "", " ")
- fmt.Println(string(b))
- }
- func containsDigit(s string) bool {
- for _, c := range s {
- switch c {
- case '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return true
- }
- }
- return false
- }
|