rule.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. package config
  2. import (
  3. "math"
  4. "path/filepath"
  5. "regexp"
  6. )
  7. // Offender is a struct that contains the information matched when searching
  8. // content and information on why it matched (i.e. the EntropyLevel)
  9. type Offender struct {
  10. Match string
  11. EntropyLevel float64
  12. }
  13. // IsEmpty checks to see if nothing was found in the match
  14. func (o *Offender) IsEmpty() bool {
  15. return o.Match == ""
  16. }
  17. // ToString the contents of the match
  18. func (o *Offender) ToString() string {
  19. return o.Match
  20. }
  21. // Rule is a struct that contains information that is loaded from a gitleaks config.
  22. // This struct is used in the Config struct as an array of Rules and is iterated
  23. // over during an scan. Each rule will be checked. If a regex match is found AND
  24. // that match is not allowlisted (globally or locally), then a leak will be appended
  25. // to the final scan report.
  26. type Rule struct {
  27. Description string
  28. Regex *regexp.Regexp
  29. File *regexp.Regexp
  30. Path *regexp.Regexp
  31. ReportGroup int
  32. Tags []string
  33. AllowList AllowList
  34. Entropies []Entropy
  35. }
  36. // Inspect checks the content of a line for a leak
  37. func (r *Rule) Inspect(line string) *Offender {
  38. match := r.Regex.FindString(line)
  39. // EntropyLevel -1 means not checked
  40. if match == "" {
  41. return &Offender{
  42. Match: "",
  43. EntropyLevel: -1,
  44. }
  45. }
  46. // check if offender is allowed
  47. // EntropyLevel -1 means not checked
  48. if r.RegexAllowed(line) {
  49. return &Offender{
  50. Match: "",
  51. EntropyLevel: -1,
  52. }
  53. }
  54. // check entropy
  55. groups := r.Regex.FindStringSubmatch(match)
  56. entropyWithinRange, entropyLevel := r.CheckEntropy(groups)
  57. if len(r.Entropies) != 0 && !entropyWithinRange {
  58. return &Offender{
  59. Match: "",
  60. EntropyLevel: entropyLevel,
  61. }
  62. }
  63. // 0 is a match for the full regex pattern
  64. if 0 < r.ReportGroup && r.ReportGroup < len(groups) {
  65. match = groups[r.ReportGroup]
  66. }
  67. return &Offender{
  68. Match: match,
  69. EntropyLevel: entropyLevel,
  70. }
  71. }
  72. // RegexAllowed checks if the content is allowlisted
  73. func (r *Rule) RegexAllowed(content string) bool {
  74. return anyRegexMatch(content, r.AllowList.Regexes)
  75. }
  76. // CommitAllowed checks if a commit is allowlisted
  77. func (r *Rule) CommitAllowed(commit string) bool {
  78. return r.AllowList.CommitAllowed(commit)
  79. }
  80. // CheckEntropy checks if there is an entropy leak
  81. func (r *Rule) CheckEntropy(groups []string) (bool, float64) {
  82. var highestFound float64 = 0
  83. for _, e := range r.Entropies {
  84. if len(groups) > e.Group {
  85. entropy := shannonEntropy(groups[e.Group])
  86. if entropy >= e.Min && entropy <= e.Max {
  87. return true, entropy
  88. } else if entropy > highestFound {
  89. highestFound = entropy
  90. }
  91. }
  92. }
  93. if len(r.Entropies) == 0 {
  94. // entropies not checked
  95. return false, -1
  96. }
  97. // entropies checked but not within the range
  98. return false, highestFound
  99. }
  100. // HasFileOrPathLeakOnly first checks if there are no entropy/regex rules, then checks if
  101. // there are any file/path leaks
  102. func (r *Rule) HasFileOrPathLeakOnly(filePath string) bool {
  103. if r.Regex.String() != "" {
  104. return false
  105. }
  106. if len(r.Entropies) != 0 {
  107. return false
  108. }
  109. if r.AllowList.FileAllowed(filepath.Base(filePath)) || r.AllowList.PathAllowed(filePath) {
  110. return false
  111. }
  112. return r.HasFileLeak(filepath.Base(filePath)) || r.HasFilePathLeak(filePath)
  113. }
  114. // HasFileLeak checks if there is a file leak
  115. func (r *Rule) HasFileLeak(fileName string) bool {
  116. return regexMatched(fileName, r.File)
  117. }
  118. // HasFilePathLeak checks if there is a path leak
  119. func (r *Rule) HasFilePathLeak(filePath string) bool {
  120. return regexMatched(filePath, r.Path)
  121. }
  122. // shannonEntropy calculates the entropy of data using the formula defined here:
  123. // https://en.wiktionary.org/wiki/Shannon_entropy
  124. // Another way to think about what this is doing is calculating the number of bits
  125. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  126. // more bits needed to encode that data.
  127. func shannonEntropy(data string) (entropy float64) {
  128. if data == "" {
  129. return 0
  130. }
  131. charCounts := make(map[rune]int)
  132. for _, char := range data {
  133. charCounts[char]++
  134. }
  135. invLength := 1.0 / float64(len(data))
  136. for _, count := range charCounts {
  137. freq := float64(count) * invLength
  138. entropy -= freq * math.Log2(freq)
  139. }
  140. return entropy
  141. }
  142. // regexMatched matched an interface to a regular expression. The interface f can
  143. // be a string type or go-git *object.File type.
  144. func regexMatched(f string, re *regexp.Regexp) bool {
  145. if re == nil {
  146. return false
  147. }
  148. if re.FindString(f) != "" {
  149. return true
  150. }
  151. return false
  152. }
  153. // anyRegexMatch matched an interface to a regular expression. The interface f can
  154. // be a string type or go-git *object.File type.
  155. func anyRegexMatch(f string, res []*regexp.Regexp) bool {
  156. for _, re := range res {
  157. if regexMatched(f, re) {
  158. return true
  159. }
  160. }
  161. return false
  162. }