utils.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. package detect
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "math"
  6. "strings"
  7. "time"
  8. "github.com/zricethezav/gitleaks/v8/report"
  9. "github.com/gitleaks/go-gitdiff/gitdiff"
  10. "github.com/rs/zerolog/log"
  11. )
  12. // augmentGitFinding updates the start and end line numbers of a finding to include the
  13. // delta from the git diff
  14. func augmentGitFinding(finding report.Finding, textFragment *gitdiff.TextFragment, f *gitdiff.File) report.Finding {
  15. if !strings.HasPrefix(finding.Match, "file detected") {
  16. finding.StartLine += int(textFragment.NewPosition)
  17. finding.EndLine += int(textFragment.NewPosition)
  18. }
  19. if f.PatchHeader != nil {
  20. finding.Commit = f.PatchHeader.SHA
  21. finding.Message = f.PatchHeader.Message()
  22. if f.PatchHeader.Author != nil {
  23. finding.Author = f.PatchHeader.Author.Name
  24. finding.Email = f.PatchHeader.Author.Email
  25. }
  26. finding.Date = f.PatchHeader.AuthorDate.UTC().Format(time.RFC3339)
  27. }
  28. return finding
  29. }
  30. // shannonEntropy calculates the entropy of data using the formula defined here:
  31. // https://en.wiktionary.org/wiki/Shannon_entropy
  32. // Another way to think about what this is doing is calculating the number of bits
  33. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  34. // more bits needed to encode that data.
  35. func shannonEntropy(data string) (entropy float64) {
  36. if data == "" {
  37. return 0
  38. }
  39. charCounts := make(map[rune]int)
  40. for _, char := range data {
  41. charCounts[char]++
  42. }
  43. invLength := 1.0 / float64(len(data))
  44. for _, count := range charCounts {
  45. freq := float64(count) * invLength
  46. entropy -= freq * math.Log2(freq)
  47. }
  48. return entropy
  49. }
  50. // filter will dedupe and redact findings
  51. func filter(findings []report.Finding, redact bool) []report.Finding {
  52. var retFindings []report.Finding
  53. for _, f := range findings {
  54. include := true
  55. if strings.Contains(strings.ToLower(f.RuleID), "generic") {
  56. for _, fPrime := range findings {
  57. if f.StartLine == fPrime.StartLine &&
  58. f.EndLine == fPrime.EndLine &&
  59. f.Commit == fPrime.Commit &&
  60. f.RuleID != fPrime.RuleID &&
  61. strings.Contains(fPrime.Secret, f.Secret) &&
  62. !strings.Contains(strings.ToLower(fPrime.RuleID), "generic") {
  63. genericMatch := strings.Replace(f.Match, f.Secret, "REDACTED", -1)
  64. betterMatch := strings.Replace(fPrime.Match, fPrime.Secret, "REDACTED", -1)
  65. log.Debug().Msgf("skipping %s finding (%s), %s rule takes precendence (%s)", f.RuleID, genericMatch, fPrime.RuleID, betterMatch)
  66. include = false
  67. break
  68. }
  69. }
  70. }
  71. if redact {
  72. f.Redact()
  73. }
  74. if include {
  75. retFindings = append(retFindings, f)
  76. }
  77. }
  78. return retFindings
  79. }
  80. func printFinding(f report.Finding) {
  81. var b []byte
  82. b, _ = json.MarshalIndent(f, "", " ")
  83. fmt.Println(string(b))
  84. }
  85. func containsDigit(s string) bool {
  86. for _, c := range s {
  87. switch c {
  88. case '1', '2', '3', '4', '5', '6', '7', '8', '9':
  89. return true
  90. }
  91. }
  92. return false
  93. }