rule.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. package scan
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io"
  6. "math"
  7. "path/filepath"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "time"
  12. "github.com/zricethezav/gitleaks/v5/config"
  13. "github.com/zricethezav/gitleaks/v5/manager"
  14. fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
  15. "github.com/go-git/go-git/v5/plumbing/object"
  16. log "github.com/sirupsen/logrus"
  17. )
  18. const (
  19. diffAddPrefix = "+"
  20. diffAddFilePrefix = "+++ b"
  21. diffAddFilePrefixSlash = "+++ b/"
  22. diffLineSignature = " @@"
  23. defaultLineNumber = -1
  24. )
  25. // CheckRules accepts bundle and checks each rule defined in the config against the bundle's content.
  26. func (repo *Repo) CheckRules(bundle *Bundle) {
  27. filename := filepath.Base(bundle.FilePath)
  28. path := filepath.Dir(bundle.FilePath)
  29. bundle.lineLookup = make(map[string]bool)
  30. // We want to check if there is a allowlist for this file
  31. if len(repo.config.Allowlist.Files) != 0 {
  32. for _, reFileName := range repo.config.Allowlist.Files {
  33. if RegexMatched(filename, reFileName) {
  34. log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
  35. return
  36. }
  37. }
  38. }
  39. // We want to check if there is a allowlist for this path
  40. if len(repo.config.Allowlist.Paths) != 0 {
  41. for _, reFilePath := range repo.config.Allowlist.Paths {
  42. if RegexMatched(path, reFilePath) {
  43. log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
  44. return
  45. }
  46. }
  47. }
  48. for _, rule := range repo.config.Rules {
  49. start := time.Now()
  50. // For each rule we want to check filename allowlists
  51. if isFileNameWhiteListed(filename, rule.Allowlist) || isFilePathWhiteListed(path, rule.Allowlist) {
  52. continue
  53. }
  54. // If it has fileNameRegex and it doesnt match we continue to next rule
  55. if ruleContainFileNameRegex(rule) && !RegexMatched(filename, rule.FileNameRegex) {
  56. continue
  57. }
  58. // If it has filePathRegex and it doesnt match we continue to next rule
  59. if ruleContainFilePathRegex(rule) && !RegexMatched(path, rule.FilePathRegex) {
  60. continue
  61. }
  62. // If it doesnt contain a Content regex then it is a filename regex match
  63. if !ruleContainRegex(rule) {
  64. repo.Manager.SendLeaks(manager.Leak{
  65. LineNumber: defaultLineNumber,
  66. Line: "N/A",
  67. Offender: "Filename/path offender: " + filename,
  68. Commit: bundle.Commit.Hash.String(),
  69. Repo: repo.Name,
  70. Message: bundle.Commit.Message,
  71. Rule: rule.Description,
  72. Author: bundle.Commit.Author.Name,
  73. Email: bundle.Commit.Author.Email,
  74. Date: bundle.Commit.Author.When,
  75. Tags: strings.Join(rule.Tags, ", "),
  76. File: filename,
  77. Operation: diffOpToString(bundle.Operation),
  78. })
  79. } else {
  80. //otherwise we check if it matches Content regex
  81. locs := rule.Regex.FindAllIndex([]byte(bundle.Content), -1)
  82. if len(locs) != 0 {
  83. for _, loc := range locs {
  84. start := loc[0]
  85. end := loc[1]
  86. for start != 0 && bundle.Content[start] != '\n' {
  87. start--
  88. }
  89. if bundle.Content[start] == '\n' {
  90. start++
  91. }
  92. for end < len(bundle.Content)-1 && bundle.Content[end] != '\n' {
  93. end++
  94. }
  95. line := bundle.Content[start:end]
  96. offender := bundle.Content[loc[0]:loc[1]]
  97. groups := rule.Regex.FindStringSubmatch(offender)
  98. if isOffenderWhiteListed(offender, rule.Allowlist) {
  99. continue
  100. }
  101. if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
  102. continue
  103. }
  104. leak := manager.Leak{
  105. LineNumber: defaultLineNumber,
  106. Line: line,
  107. Offender: offender,
  108. Commit: bundle.Commit.Hash.String(),
  109. Repo: repo.Name,
  110. Message: bundle.Commit.Message,
  111. Rule: rule.Description,
  112. Author: bundle.Commit.Author.Name,
  113. Email: bundle.Commit.Author.Email,
  114. Date: bundle.Commit.Author.When,
  115. Tags: strings.Join(rule.Tags, ", "),
  116. File: bundle.FilePath,
  117. Operation: diffOpToString(bundle.Operation),
  118. }
  119. // only search for line numbers on non-deletions
  120. if bundle.Operation != fdiff.Delete {
  121. extractAndInjectLineNumber(&leak, bundle, repo)
  122. }
  123. repo.Manager.SendLeaks(leak)
  124. }
  125. }
  126. }
  127. // TODO should return filenameRegex if only file rule
  128. repo.Manager.RecordTime(manager.RegexTime{
  129. Time: howLong(start),
  130. Regex: rule.Regex.String(),
  131. })
  132. }
  133. }
  134. // RegexMatched matched an interface to a regular expression. The interface f can
  135. // be a string type or go-git *object.File type.
  136. func RegexMatched(f interface{}, re *regexp.Regexp) bool {
  137. if re == nil {
  138. return false
  139. }
  140. switch f.(type) {
  141. case nil:
  142. return false
  143. case string:
  144. if re.FindString(f.(string)) != "" {
  145. return true
  146. }
  147. return false
  148. case *object.File:
  149. if re.FindString(f.(*object.File).Name) != "" {
  150. return true
  151. }
  152. return false
  153. }
  154. return false
  155. }
  156. // diffOpToString converts a fdiff.Operation to a string
  157. func diffOpToString(operation fdiff.Operation) string {
  158. switch operation {
  159. case fdiff.Add:
  160. return "addition"
  161. case fdiff.Equal:
  162. return "equal"
  163. default:
  164. return "deletion"
  165. }
  166. }
  167. // extractAndInjectLine accepts a leak, bundle, and repo which it uses to do a reverse search in order to extract
  168. // the line number of a historic or present leak. The function is only called when the git operation is an addition
  169. // or none, it does not get called when the git operation is deletion.
  170. func extractAndInjectLineNumber(leak *manager.Leak, bundle *Bundle, repo *Repo) {
  171. var err error
  172. switch bundle.scanType {
  173. case patchScan:
  174. if bundle.Patch == "" {
  175. return
  176. }
  177. scanner := bufio.NewScanner(strings.NewReader(bundle.Patch))
  178. currFile := ""
  179. currLine := 0
  180. currStartDiffLine := 0
  181. for scanner.Scan() {
  182. txt := scanner.Text()
  183. if strings.HasPrefix(txt, diffAddFilePrefix) {
  184. currStartDiffLine = 1
  185. currLine = 0
  186. currFile = strings.Split(txt, diffAddFilePrefixSlash)[1]
  187. // next line contains diff line information so lets scan it here
  188. scanner.Scan()
  189. txt := scanner.Text()
  190. i := strings.Index(txt, diffAddPrefix)
  191. pairs := strings.Split(strings.Split(txt[i+1:], diffLineSignature)[0], ",")
  192. currStartDiffLine, err = strconv.Atoi(pairs[0])
  193. if err != nil {
  194. log.Debug(err)
  195. return
  196. }
  197. continue
  198. } else if strings.HasPrefix(txt, diffAddPrefix) && strings.Contains(txt, leak.Line) && leak.File == currFile {
  199. potentialLine := currLine + currStartDiffLine
  200. if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)]; !ok {
  201. bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)] = true
  202. leak.LineNumber = potentialLine
  203. return
  204. }
  205. }
  206. currLine++
  207. }
  208. case commitScan:
  209. if bundle.Commit == nil {
  210. return
  211. }
  212. f, err := bundle.Commit.File(bundle.FilePath)
  213. if err != nil {
  214. log.Error(err)
  215. return
  216. }
  217. r, err := f.Reader()
  218. if err != nil {
  219. log.Error(err)
  220. return
  221. }
  222. leak.LineNumber = extractLineHelper(r, bundle, leak)
  223. case uncommittedScan:
  224. wt, err := repo.Worktree()
  225. if err != nil {
  226. log.Error(err)
  227. return
  228. }
  229. f, err := wt.Filesystem.Open(leak.File)
  230. if err != nil {
  231. log.Error(err)
  232. return
  233. }
  234. leak.LineNumber = extractLineHelper(f, bundle, leak)
  235. }
  236. }
  237. // extractLineHelper consolidates code for checking the leak line against the contents of a reader to find the
  238. // line number of the leak.
  239. func extractLineHelper(r io.Reader, bundle *Bundle, leak *manager.Leak) int {
  240. scanner := bufio.NewScanner(r)
  241. lineNumber := 1
  242. for scanner.Scan() {
  243. if leak.Line == scanner.Text() {
  244. if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)]; !ok {
  245. bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)] = true
  246. return lineNumber
  247. }
  248. }
  249. lineNumber++
  250. }
  251. return -1
  252. }
  253. // trippedEntropy checks if a given capture group or offender falls in between entropy ranges
  254. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  255. func trippedEntropy(groups []string, rule config.Rule) bool {
  256. for _, e := range rule.Entropies {
  257. if len(groups) > e.Group {
  258. entropy := shannonEntropy(groups[e.Group])
  259. if entropy >= e.Min && entropy <= e.Max {
  260. return true
  261. }
  262. }
  263. }
  264. return false
  265. }
  266. // shannonEntropy calculates the entropy of data using the formula defined here:
  267. // https://en.wiktionary.org/wiki/Shannon_entropy
  268. // Another way to think about what this is doing is calculating the number of bits
  269. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  270. // more bits needed to encode that data.
  271. func shannonEntropy(data string) (entropy float64) {
  272. if data == "" {
  273. return 0
  274. }
  275. charCounts := make(map[rune]int)
  276. for _, char := range data {
  277. charCounts[char]++
  278. }
  279. invLength := 1.0 / float64(len(data))
  280. for _, count := range charCounts {
  281. freq := float64(count) * invLength
  282. entropy -= freq * math.Log2(freq)
  283. }
  284. return entropy
  285. }
  286. // Checks if the given rule has a regex
  287. func ruleContainRegex(rule config.Rule) bool {
  288. if rule.Regex == nil {
  289. return false
  290. }
  291. if rule.Regex.String() == "" {
  292. return false
  293. }
  294. return true
  295. }
  296. // Checks if the given rule has a file name regex
  297. func ruleContainFileNameRegex(rule config.Rule) bool {
  298. if rule.FileNameRegex == nil {
  299. return false
  300. }
  301. if rule.FileNameRegex.String() == "" {
  302. return false
  303. }
  304. return true
  305. }
  306. // Checks if the given rule has a file path regex
  307. func ruleContainFilePathRegex(rule config.Rule) bool {
  308. if rule.FilePathRegex == nil {
  309. return false
  310. }
  311. if rule.FilePathRegex.String() == "" {
  312. return false
  313. }
  314. return true
  315. }
  316. func isCommitWhiteListed(commitHash string, allowlistedCommits []string) bool {
  317. for _, hash := range allowlistedCommits {
  318. if commitHash == hash {
  319. return true
  320. }
  321. }
  322. return false
  323. }
  324. func isOffenderWhiteListed(offender string, allowlist []config.Allowlist) bool {
  325. if len(allowlist) != 0 {
  326. for _, wl := range allowlist {
  327. if wl.Regex.FindString(offender) != "" {
  328. return true
  329. }
  330. }
  331. }
  332. return false
  333. }
  334. func isFileNameWhiteListed(filename string, allowlist []config.Allowlist) bool {
  335. if len(allowlist) != 0 {
  336. for _, wl := range allowlist {
  337. if RegexMatched(filename, wl.File) {
  338. return true
  339. }
  340. }
  341. }
  342. return false
  343. }
  344. func isFilePathWhiteListed(filepath string, allowlist []config.Allowlist) bool {
  345. if len(allowlist) != 0 {
  346. for _, wl := range allowlist {
  347. if RegexMatched(filepath, wl.Path) {
  348. return true
  349. }
  350. }
  351. }
  352. return false
  353. }