rule.go 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. package scan
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io"
  6. "math"
  7. "path/filepath"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/zricethezav/gitleaks/v6/config"
  13. "github.com/zricethezav/gitleaks/v6/manager"
  14. fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
  15. "github.com/go-git/go-git/v5/plumbing/object"
  16. log "github.com/sirupsen/logrus"
  17. )
  18. const (
  19. diffAddPrefix = "+"
  20. diffAddFilePrefix = "+++ b"
  21. diffAddFilePrefixSlash = "+++ b/"
  22. diffLineSignature = " @@"
  23. defaultLineNumber = -1
  24. )
  25. // CheckRules accepts bundle and checks each rule defined in the config against the bundle's content.
  26. func (repo *Repo) CheckRules(bundle *Bundle) {
  27. filename := filepath.Base(bundle.FilePath)
  28. path := filepath.Dir(bundle.FilePath)
  29. bundle.lineLookup = make(map[string]bool)
  30. // We want to check if there is a allowlist for this file
  31. if len(repo.config.Allowlist.Files) != 0 {
  32. for _, reFileName := range repo.config.Allowlist.Files {
  33. if RegexMatched(filename, reFileName) {
  34. log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
  35. return
  36. }
  37. }
  38. }
  39. // We want to check if there is a allowlist for this path
  40. if len(repo.config.Allowlist.Paths) != 0 {
  41. for _, reFilePath := range repo.config.Allowlist.Paths {
  42. if RegexMatched(path, reFilePath) {
  43. log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
  44. return
  45. }
  46. }
  47. }
  48. for _, rule := range repo.config.Rules {
  49. start := time.Now()
  50. // For each rule we want to check filename allowlists
  51. if isAllowListed(filename, rule.Allowlist.Files) || isAllowListed(path, rule.Allowlist.Paths) {
  52. continue
  53. }
  54. // If it has fileNameRegex and it doesnt match we continue to next rule
  55. if ruleContainFileRegex(rule) && !RegexMatched(filename, rule.File) {
  56. continue
  57. }
  58. // If it has filePathRegex and it doesnt match we continue to next rule
  59. if ruleContainPathRegex(rule) && !RegexMatched(path, rule.Path) {
  60. continue
  61. }
  62. // If it doesnt contain a Content regex then it is a filename regex match
  63. if !ruleContainRegex(rule) {
  64. repo.Manager.SendLeaks(manager.Leak{
  65. LineNumber: defaultLineNumber,
  66. Line: "N/A",
  67. Offender: "Filename/path offender: " + filename,
  68. Commit: bundle.Commit.Hash.String(),
  69. Repo: repo.Name,
  70. Message: bundle.Commit.Message,
  71. Rule: rule.Description,
  72. Author: bundle.Commit.Author.Name,
  73. Email: bundle.Commit.Author.Email,
  74. Date: bundle.Commit.Author.When,
  75. Tags: strings.Join(rule.Tags, ", "),
  76. File: filename,
  77. Operation: diffOpToString(bundle.Operation),
  78. })
  79. } else {
  80. //otherwise we check if it matches Content regex
  81. locs := rule.Regex.FindAllIndex([]byte(bundle.Content), -1)
  82. if len(locs) != 0 {
  83. for _, loc := range locs {
  84. start := loc[0]
  85. end := loc[1]
  86. for start != 0 && bundle.Content[start] != '\n' {
  87. start--
  88. }
  89. if bundle.Content[start] == '\n' {
  90. start++
  91. }
  92. for end < len(bundle.Content)-1 && bundle.Content[end] != '\n' {
  93. end++
  94. }
  95. line := bundle.Content[start:end]
  96. offender := bundle.Content[loc[0]:loc[1]]
  97. groups := rule.Regex.FindStringSubmatch(offender)
  98. if isAllowListed(line, append(rule.Allowlist.Regexes, repo.config.Allowlist.Regexes...)) {
  99. continue
  100. }
  101. if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
  102. continue
  103. }
  104. leak := manager.Leak{
  105. LineNumber: defaultLineNumber,
  106. Line: line,
  107. Offender: offender,
  108. Commit: bundle.Commit.Hash.String(),
  109. Repo: repo.Name,
  110. Message: bundle.Commit.Message,
  111. Rule: rule.Description,
  112. Author: bundle.Commit.Author.Name,
  113. Email: bundle.Commit.Author.Email,
  114. Date: bundle.Commit.Author.When,
  115. Tags: strings.Join(rule.Tags, ", "),
  116. File: bundle.FilePath,
  117. Operation: diffOpToString(bundle.Operation),
  118. }
  119. // only search for line numbers on non-deletions
  120. if bundle.Operation != fdiff.Delete {
  121. extractAndInjectLineNumber(&leak, bundle, repo)
  122. }
  123. repo.Manager.SendLeaks(leak)
  124. }
  125. }
  126. }
  127. repo.Manager.RecordTime(manager.RegexTime{
  128. Time: howLong(start),
  129. Regex: rule.Regex.String(),
  130. })
  131. }
  132. }
  133. // RegexMatched matched an interface to a regular expression. The interface f can
  134. // be a string type or go-git *object.File type.
  135. func RegexMatched(f interface{}, re *regexp.Regexp) bool {
  136. if re == nil {
  137. return false
  138. }
  139. switch f.(type) {
  140. case nil:
  141. return false
  142. case string:
  143. if re.FindString(f.(string)) != "" {
  144. return true
  145. }
  146. return false
  147. case *object.File:
  148. if re.FindString(f.(*object.File).Name) != "" {
  149. return true
  150. }
  151. return false
  152. }
  153. return false
  154. }
  155. // diffOpToString converts a fdiff.Operation to a string
  156. func diffOpToString(operation fdiff.Operation) string {
  157. switch operation {
  158. case fdiff.Add:
  159. return "addition"
  160. case fdiff.Equal:
  161. return "equal"
  162. default:
  163. return "deletion"
  164. }
  165. }
  166. // extractAndInjectLine accepts a leak, bundle, and repo which it uses to do a reverse search in order to extract
  167. // the line number of a historic or present leak. The function is only called when the git operation is an addition
  168. // or none, it does not get called when the git operation is deletion.
  169. func extractAndInjectLineNumber(leak *manager.Leak, bundle *Bundle, repo *Repo) {
  170. var err error
  171. switch bundle.scanType {
  172. case patchScan:
  173. if bundle.Patch == "" {
  174. return
  175. }
  176. // This is needed as some patches generate strings that are larger than
  177. // scanners max size (MaxScanTokenSize = 64 * 1024)
  178. // https://github.com/zricethezav/gitleaks/issues/413
  179. buf := make([]byte, len(bundle.Patch))
  180. scanner := bufio.NewScanner(strings.NewReader(bundle.Patch))
  181. scanner.Buffer(buf, len(bundle.Patch))
  182. scanner.Split(bufio.ScanLines)
  183. currFile := ""
  184. currLine := 0
  185. currStartDiffLine := 0
  186. for scanner.Scan() {
  187. txt := scanner.Text()
  188. if strings.HasPrefix(txt, diffAddFilePrefix) {
  189. currStartDiffLine = 1
  190. currLine = 0
  191. currFile = strings.Split(txt, diffAddFilePrefixSlash)[1]
  192. // next line contains diff line information so lets scan it here
  193. scanner.Scan()
  194. txt := scanner.Text()
  195. i := strings.Index(txt, diffAddPrefix)
  196. pairs := strings.Split(strings.Split(txt[i+1:], diffLineSignature)[0], ",")
  197. currStartDiffLine, err = strconv.Atoi(pairs[0])
  198. if err != nil {
  199. log.Debug(err)
  200. return
  201. }
  202. continue
  203. } else if strings.HasPrefix(txt, diffAddPrefix) && strings.Contains(txt, leak.Line) && leak.File == currFile {
  204. potentialLine := currLine + currStartDiffLine
  205. if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)]; !ok {
  206. bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)] = true
  207. leak.LineNumber = potentialLine
  208. return
  209. }
  210. }
  211. currLine++
  212. }
  213. case commitScan:
  214. if bundle.Commit == nil {
  215. return
  216. }
  217. f, err := bundle.Commit.File(bundle.FilePath)
  218. if err != nil {
  219. log.Error(err)
  220. return
  221. }
  222. r, err := f.Reader()
  223. if err != nil {
  224. log.Error(err)
  225. return
  226. }
  227. leak.LineNumber = extractLineHelper(r, bundle, leak)
  228. case uncommittedScan:
  229. wt, err := repo.Worktree()
  230. if err != nil {
  231. log.Error(err)
  232. return
  233. }
  234. f, err := wt.Filesystem.Open(leak.File)
  235. if err != nil {
  236. log.Error(err)
  237. return
  238. }
  239. leak.LineNumber = extractLineHelper(f, bundle, leak)
  240. }
  241. }
  242. // extractLineHelper consolidates code for checking the leak line against the contents of a reader to find the
  243. // line number of the leak.
  244. func extractLineHelper(r io.Reader, bundle *Bundle, leak *manager.Leak) int {
  245. scanner := bufio.NewScanner(r)
  246. lineNumber := 1
  247. for scanner.Scan() {
  248. if leak.Line == scanner.Text() {
  249. if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)]; !ok {
  250. bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)] = true
  251. return lineNumber
  252. }
  253. }
  254. lineNumber++
  255. }
  256. return -1
  257. }
  258. // trippedEntropy checks if a given capture group or offender falls in between entropy ranges
  259. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  260. func trippedEntropy(groups []string, rule config.Rule) bool {
  261. for _, e := range rule.Entropies {
  262. if len(groups) > e.Group {
  263. entropy := shannonEntropy(groups[e.Group])
  264. if entropy >= e.Min && entropy <= e.Max {
  265. return true
  266. }
  267. }
  268. }
  269. return false
  270. }
  271. // shannonEntropy calculates the entropy of data using the formula defined here:
  272. // https://en.wiktionary.org/wiki/Shannon_entropy
  273. // Another way to think about what this is doing is calculating the number of bits
  274. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  275. // more bits needed to encode that data.
  276. func shannonEntropy(data string) (entropy float64) {
  277. if data == "" {
  278. return 0
  279. }
  280. charCounts := make(map[rune]int)
  281. for _, char := range data {
  282. charCounts[char]++
  283. }
  284. invLength := 1.0 / float64(len(data))
  285. for _, count := range charCounts {
  286. freq := float64(count) * invLength
  287. entropy -= freq * math.Log2(freq)
  288. }
  289. return entropy
  290. }
  291. // Checks if the given rule has a regex
  292. func ruleContainRegex(rule config.Rule) bool {
  293. if rule.Regex == nil {
  294. return false
  295. }
  296. if rule.Regex.String() == "" {
  297. return false
  298. }
  299. return true
  300. }
  301. // Checks if the given rule has a file name regex
  302. func ruleContainFileRegex(rule config.Rule) bool {
  303. if rule.File == nil {
  304. return false
  305. }
  306. if rule.File.String() == "" {
  307. return false
  308. }
  309. return true
  310. }
  311. // Checks if the given rule has a file path regex
  312. func ruleContainPathRegex(rule config.Rule) bool {
  313. if rule.Path == nil {
  314. return false
  315. }
  316. if rule.Path.String() == "" {
  317. return false
  318. }
  319. return true
  320. }
  321. func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
  322. for _, hash := range allowlistedCommits {
  323. if commitHash == hash {
  324. return true
  325. }
  326. }
  327. return false
  328. }
  329. func isAllowListed(target string, allowList []*regexp.Regexp) bool {
  330. if len(allowList) != 0 {
  331. for _, re := range allowList {
  332. if re.FindString(target) != "" {
  333. return true
  334. }
  335. }
  336. }
  337. return false
  338. }