rule.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. package scan
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io"
  6. "math"
  7. "path/filepath"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/zricethezav/gitleaks/v6/config"
  13. "github.com/zricethezav/gitleaks/v6/manager"
  14. fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
  15. "github.com/go-git/go-git/v5/plumbing/object"
  16. log "github.com/sirupsen/logrus"
  17. )
  18. const (
  19. diffAddPrefix = "+"
  20. diffAddFilePrefix = "+++ b"
  21. diffAddFilePrefixSlash = "+++ b/"
  22. diffLineSignature = " @@"
  23. diffLineSignaturePrefix = "@@ "
  24. defaultLineNumber = -1
  25. )
  26. // CheckRules accepts bundle and checks each rule defined in the config against the bundle's content.
  27. func (repo *Repo) CheckRules(bundle *Bundle) {
  28. filename := filepath.Base(bundle.FilePath)
  29. path := filepath.Dir(bundle.FilePath)
  30. bundle.lineLookup = make(map[string]bool)
  31. // We want to check if there is a allowlist for this file
  32. if len(repo.config.Allowlist.Files) != 0 {
  33. for _, reFileName := range repo.config.Allowlist.Files {
  34. if RegexMatched(filename, reFileName) {
  35. log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
  36. return
  37. }
  38. }
  39. }
  40. // We want to check if there is a allowlist for this path
  41. if len(repo.config.Allowlist.Paths) != 0 {
  42. for _, reFilePath := range repo.config.Allowlist.Paths {
  43. if RegexMatched(path, reFilePath) {
  44. log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
  45. return
  46. }
  47. }
  48. }
  49. for _, rule := range repo.config.Rules {
  50. start := time.Now()
  51. // For each rule we want to check filename allowlists
  52. if isAllowListed(filename, rule.AllowList.Files) || isAllowListed(path, rule.AllowList.Paths) {
  53. continue
  54. }
  55. // If it has fileNameRegex and it doesnt match we continue to next rule
  56. if ruleContainFileRegex(rule) && !RegexMatched(filename, rule.File) {
  57. continue
  58. }
  59. // If it has filePathRegex and it doesnt match we continue to next rule
  60. if ruleContainPathRegex(rule) && !RegexMatched(path, rule.Path) {
  61. continue
  62. }
  63. // If it doesnt contain a Content regex then it is a filename regex match
  64. if !ruleContainRegex(rule) {
  65. repo.Manager.SendLeaks(manager.Leak{
  66. LineNumber: defaultLineNumber,
  67. Line: "N/A",
  68. Offender: "Filename/path offender: " + filename,
  69. Commit: bundle.Commit.Hash.String(),
  70. Repo: repo.Name,
  71. Message: bundle.Commit.Message,
  72. Rule: rule.Description,
  73. Author: bundle.Commit.Author.Name,
  74. Email: bundle.Commit.Author.Email,
  75. Date: bundle.Commit.Author.When,
  76. Tags: strings.Join(rule.Tags, ", "),
  77. File: filename,
  78. Operation: diffOpToString(bundle.Operation),
  79. })
  80. } else {
  81. //otherwise we check if it matches Content regex
  82. locs := rule.Regex.FindAllIndex([]byte(bundle.Content), -1)
  83. if len(locs) != 0 {
  84. for _, loc := range locs {
  85. start := loc[0]
  86. end := loc[1]
  87. for start != 0 && bundle.Content[start] != '\n' {
  88. start--
  89. }
  90. if bundle.Content[start] == '\n' {
  91. start++
  92. }
  93. for end < len(bundle.Content)-1 && bundle.Content[end] != '\n' {
  94. end++
  95. }
  96. line := bundle.Content[start:end]
  97. offender := bundle.Content[loc[0]:loc[1]]
  98. groups := rule.Regex.FindStringSubmatch(offender)
  99. if isAllowListed(line, append(rule.AllowList.Regexes, repo.config.Allowlist.Regexes...)) {
  100. continue
  101. }
  102. if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
  103. continue
  104. }
  105. // 0 is a match for the full regex pattern
  106. if 0 < rule.ReportGroup && rule.ReportGroup < len(groups) {
  107. offender = groups[rule.ReportGroup]
  108. }
  109. leak := manager.Leak{
  110. LineNumber: defaultLineNumber,
  111. Line: line,
  112. Offender: offender,
  113. Commit: bundle.Commit.Hash.String(),
  114. Repo: repo.Name,
  115. Message: bundle.Commit.Message,
  116. Rule: rule.Description,
  117. Author: bundle.Commit.Author.Name,
  118. Email: bundle.Commit.Author.Email,
  119. Date: bundle.Commit.Author.When,
  120. Tags: strings.Join(rule.Tags, ", "),
  121. File: bundle.FilePath,
  122. Operation: diffOpToString(bundle.Operation),
  123. }
  124. // only search for line numbers on non-deletions
  125. if bundle.Operation != fdiff.Delete {
  126. extractAndInjectLineNumber(&leak, bundle, repo)
  127. }
  128. repo.Manager.SendLeaks(leak)
  129. }
  130. }
  131. }
  132. repo.Manager.RecordTime(manager.RegexTime{
  133. Time: howLong(start),
  134. Regex: rule.Regex.String(),
  135. })
  136. }
  137. }
  138. // RegexMatched matched an interface to a regular expression. The interface f can
  139. // be a string type or go-git *object.File type.
  140. func RegexMatched(f interface{}, re *regexp.Regexp) bool {
  141. if re == nil {
  142. return false
  143. }
  144. switch f.(type) {
  145. case nil:
  146. return false
  147. case string:
  148. if re.FindString(f.(string)) != "" {
  149. return true
  150. }
  151. return false
  152. case *object.File:
  153. if re.FindString(f.(*object.File).Name) != "" {
  154. return true
  155. }
  156. return false
  157. }
  158. return false
  159. }
  160. // diffOpToString converts a fdiff.Operation to a string
  161. func diffOpToString(operation fdiff.Operation) string {
  162. switch operation {
  163. case fdiff.Add:
  164. return "addition"
  165. case fdiff.Equal:
  166. return "equal"
  167. default:
  168. return "deletion"
  169. }
  170. }
  171. // extractAndInjectLine accepts a leak, bundle, and repo which it uses to do a reverse search in order to extract
  172. // the line number of a historic or present leak. The function is only called when the git operation is an addition
  173. // or none, it does not get called when the git operation is deletion.
  174. func extractAndInjectLineNumber(leak *manager.Leak, bundle *Bundle, repo *Repo) {
  175. var err error
  176. switch bundle.scanType {
  177. case patchScan:
  178. if bundle.Patch == "" {
  179. return
  180. }
  181. // This is needed as some patches generate strings that are larger than
  182. // scanners max size (MaxScanTokenSize = 64 * 1024)
  183. // https://github.com/zricethezav/gitleaks/issues/413
  184. buf := make([]byte, len(bundle.Patch))
  185. scanner := bufio.NewScanner(strings.NewReader(bundle.Patch))
  186. scanner.Buffer(buf, len(bundle.Patch))
  187. scanner.Split(bufio.ScanLines)
  188. currFile := ""
  189. currLine := 0
  190. currStartDiffLine := 0
  191. for scanner.Scan() {
  192. txt := scanner.Text()
  193. if strings.HasPrefix(txt, diffAddFilePrefix) {
  194. currStartDiffLine = 1
  195. currLine = 0
  196. currFile = strings.Split(txt, diffAddFilePrefixSlash)[1]
  197. // next line contains diff line information so lets scan it here
  198. scanner.Scan()
  199. txt := scanner.Text()
  200. i := strings.Index(txt, diffAddPrefix)
  201. pairs := strings.Split(strings.Split(txt[i+1:], diffLineSignature)[0], ",")
  202. currStartDiffLine, err = strconv.Atoi(pairs[0])
  203. if err != nil {
  204. log.Debug(err)
  205. return
  206. }
  207. continue
  208. } else if strings.HasPrefix(txt, diffAddPrefix) && strings.Contains(txt, leak.Line) && leak.File == currFile {
  209. potentialLine := currLine + currStartDiffLine
  210. if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)]; !ok {
  211. bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)] = true
  212. leak.LineNumber = potentialLine
  213. return
  214. }
  215. } else if strings.HasPrefix(txt, diffLineSignaturePrefix) && currStartDiffLine != 0 {
  216. // This logic is used for when there are multiple leaks of the same offender within the same patch
  217. i := strings.Index(txt, diffAddPrefix)
  218. pairs := strings.Split(strings.Split(txt[i+1:], diffLineSignature)[0], ",")
  219. currStartDiffLine, err = strconv.Atoi(pairs[0])
  220. if err != nil {
  221. log.Debug(err)
  222. return
  223. }
  224. currLine = 0
  225. if !strings.HasSuffix(txt, diffLineSignature) {
  226. currLine = -1
  227. }
  228. }
  229. currLine++
  230. }
  231. case commitScan:
  232. if bundle.Commit == nil {
  233. return
  234. }
  235. f, err := bundle.Commit.File(bundle.FilePath)
  236. if err != nil {
  237. log.Error(err)
  238. return
  239. }
  240. r, err := f.Reader()
  241. if err != nil {
  242. log.Error(err)
  243. return
  244. }
  245. leak.LineNumber = extractLineHelper(r, bundle, leak)
  246. case uncommittedScan:
  247. wt, err := repo.Worktree()
  248. if err != nil {
  249. log.Error(err)
  250. return
  251. }
  252. f, err := wt.Filesystem.Open(leak.File)
  253. if err != nil {
  254. log.Error(err)
  255. return
  256. }
  257. leak.LineNumber = extractLineHelper(f, bundle, leak)
  258. }
  259. }
  260. // extractLineHelper consolidates code for checking the leak line against the contents of a reader to find the
  261. // line number of the leak.
  262. func extractLineHelper(r io.Reader, bundle *Bundle, leak *manager.Leak) int {
  263. scanner := bufio.NewScanner(r)
  264. lineNumber := 1
  265. for scanner.Scan() {
  266. if leak.Line == scanner.Text() {
  267. if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)]; !ok {
  268. bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)] = true
  269. return lineNumber
  270. }
  271. }
  272. lineNumber++
  273. }
  274. return -1
  275. }
  276. // trippedEntropy checks if a given capture group or offender falls in between entropy ranges
  277. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  278. func trippedEntropy(groups []string, rule config.Rule) bool {
  279. for _, e := range rule.Entropies {
  280. if len(groups) > e.Group {
  281. entropy := shannonEntropy(groups[e.Group])
  282. if entropy >= e.Min && entropy <= e.Max {
  283. return true
  284. }
  285. }
  286. }
  287. return false
  288. }
  289. // shannonEntropy calculates the entropy of data using the formula defined here:
  290. // https://en.wiktionary.org/wiki/Shannon_entropy
  291. // Another way to think about what this is doing is calculating the number of bits
  292. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  293. // more bits needed to encode that data.
  294. func shannonEntropy(data string) (entropy float64) {
  295. if data == "" {
  296. return 0
  297. }
  298. charCounts := make(map[rune]int)
  299. for _, char := range data {
  300. charCounts[char]++
  301. }
  302. invLength := 1.0 / float64(len(data))
  303. for _, count := range charCounts {
  304. freq := float64(count) * invLength
  305. entropy -= freq * math.Log2(freq)
  306. }
  307. return entropy
  308. }
  309. // Checks if the given rule has a regex
  310. func ruleContainRegex(rule config.Rule) bool {
  311. if rule.Regex == nil {
  312. return false
  313. }
  314. if rule.Regex.String() == "" {
  315. return false
  316. }
  317. return true
  318. }
  319. // Checks if the given rule has a file name regex
  320. func ruleContainFileRegex(rule config.Rule) bool {
  321. if rule.File == nil {
  322. return false
  323. }
  324. if rule.File.String() == "" {
  325. return false
  326. }
  327. return true
  328. }
  329. // Checks if the given rule has a file path regex
  330. func ruleContainPathRegex(rule config.Rule) bool {
  331. if rule.Path == nil {
  332. return false
  333. }
  334. if rule.Path.String() == "" {
  335. return false
  336. }
  337. return true
  338. }
  339. func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
  340. for _, hash := range allowlistedCommits {
  341. if commitHash == hash {
  342. return true
  343. }
  344. }
  345. return false
  346. }
  347. func isAllowListed(target string, allowList []*regexp.Regexp) bool {
  348. if len(allowList) != 0 {
  349. for _, re := range allowList {
  350. if re.FindString(target) != "" {
  351. return true
  352. }
  353. }
  354. }
  355. return false
  356. }