rule.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. package config
  2. import (
  3. "math"
  4. "path/filepath"
  5. "regexp"
  6. )
  7. // Rule is a struct that contains information that is loaded from a gitleaks config.
  8. // This struct is used in the Config struct as an array of Rules and is iterated
  9. // over during an scan. Each rule will be checked. If a regex match is found AND
  10. // that match is not allowlisted (globally or locally), then a leak will be appended
  11. // to the final scan report.
  12. type Rule struct {
  13. Description string
  14. Regex *regexp.Regexp
  15. File *regexp.Regexp
  16. Path *regexp.Regexp
  17. ReportGroup int
  18. Tags []string
  19. AllowList AllowList
  20. Entropies []Entropy
  21. }
  22. // Inspect checks the content of a line for a leak
  23. func (r *Rule) Inspect(line string) string {
  24. offender := r.Regex.FindString(line)
  25. if offender == "" {
  26. return ""
  27. }
  28. // check if offender is allowed
  29. if r.RegexAllowed(line) {
  30. return ""
  31. }
  32. // check entropy
  33. groups := r.Regex.FindStringSubmatch(offender)
  34. if len(r.Entropies) != 0 && !r.ContainsEntropyLeak(groups) {
  35. return ""
  36. }
  37. // 0 is a match for the full regex pattern
  38. if 0 < r.ReportGroup && r.ReportGroup < len(groups) {
  39. offender = groups[r.ReportGroup]
  40. }
  41. return offender
  42. }
  43. // RegexAllowed checks if the content is allowlisted
  44. func (r *Rule) RegexAllowed(content string) bool {
  45. return anyRegexMatch(content, r.AllowList.Regexes)
  46. }
  47. // CommitAllowed checks if a commit is allowlisted
  48. func (r *Rule) CommitAllowed(commit string) bool {
  49. return r.AllowList.CommitAllowed(commit)
  50. }
  51. // ContainsEntropyLeak checks if there is an entropy leak
  52. func (r *Rule) ContainsEntropyLeak(groups []string) bool {
  53. for _, e := range r.Entropies {
  54. if len(groups) > e.Group {
  55. entropy := shannonEntropy(groups[e.Group])
  56. if entropy >= e.Min && entropy <= e.Max {
  57. return true
  58. }
  59. }
  60. }
  61. return false
  62. }
  63. // HasFileOrPathLeakOnly first checks if there are no entropy/regex rules, then checks if
  64. // there are any file/path leaks
  65. func (r *Rule) HasFileOrPathLeakOnly(filePath string) bool {
  66. if r.Regex.String() != "" {
  67. return false
  68. }
  69. if len(r.Entropies) != 0 {
  70. return false
  71. }
  72. if r.AllowList.FileAllowed(filepath.Base(filePath)) || r.AllowList.PathAllowed(filePath) {
  73. return false
  74. }
  75. return r.HasFileLeak(filepath.Base(filePath)) || r.HasFilePathLeak(filePath)
  76. }
  77. // HasFileLeak checks if there is a file leak
  78. func (r *Rule) HasFileLeak(fileName string) bool {
  79. return regexMatched(fileName, r.File)
  80. }
  81. // HasFilePathLeak checks if there is a path leak
  82. func (r *Rule) HasFilePathLeak(filePath string) bool {
  83. return regexMatched(filePath, r.Path)
  84. }
  85. // shannonEntropy calculates the entropy of data using the formula defined here:
  86. // https://en.wiktionary.org/wiki/Shannon_entropy
  87. // Another way to think about what this is doing is calculating the number of bits
  88. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  89. // more bits needed to encode that data.
  90. func shannonEntropy(data string) (entropy float64) {
  91. if data == "" {
  92. return 0
  93. }
  94. charCounts := make(map[rune]int)
  95. for _, char := range data {
  96. charCounts[char]++
  97. }
  98. invLength := 1.0 / float64(len(data))
  99. for _, count := range charCounts {
  100. freq := float64(count) * invLength
  101. entropy -= freq * math.Log2(freq)
  102. }
  103. return entropy
  104. }
  105. // regexMatched matched an interface to a regular expression. The interface f can
  106. // be a string type or go-git *object.File type.
  107. func regexMatched(f string, re *regexp.Regexp) bool {
  108. if re == nil {
  109. return false
  110. }
  111. if re.FindString(f) != "" {
  112. return true
  113. }
  114. return false
  115. }
  116. // anyRegexMatch matched an interface to a regular expression. The interface f can
  117. // be a string type or go-git *object.File type.
  118. func anyRegexMatch(f string, res []*regexp.Regexp) bool {
  119. for _, re := range res {
  120. if regexMatched(f, re) {
  121. return true
  122. }
  123. }
  124. return false
  125. }