util.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. package audit
  2. import (
  3. "fmt"
  4. "math"
  5. "path/filepath"
  6. "regexp"
  7. "runtime"
  8. "strings"
  9. "time"
  10. "github.com/zricethezav/gitleaks/v4/config"
  11. "github.com/zricethezav/gitleaks/v4/manager"
  12. log "github.com/sirupsen/logrus"
  13. "gopkg.in/src-d/go-git.v4"
  14. "gopkg.in/src-d/go-git.v4/plumbing"
  15. fdiff "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  16. "gopkg.in/src-d/go-git.v4/plumbing/object"
  17. )
  18. // Inspect patch accepts a patch, commit, and repo. If the patches contains files that are
  19. // binary, then gitleaks will skip auditing that file OR if a file is matched on
  20. // whitelisted files set in the configuration. If a global rule for files is defined and a filename
  21. // matches said global rule, then a leak is sent to the manager.
  22. // After that, file chunks are created which are then inspected by InspectString()
  23. func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
  24. for _, f := range patch.FilePatches() {
  25. if repo.timeoutReached() {
  26. return
  27. }
  28. if f.IsBinary() {
  29. continue
  30. }
  31. for _, chunk := range f.Chunks() {
  32. if chunk.Type() == fdiff.Delete || chunk.Type() == fdiff.Add {
  33. InspectFile(chunk.Content(), getFileFullPath(f), c, repo)
  34. }
  35. }
  36. }
  37. }
  38. // getFileName accepts a file patch and returns the filename
  39. func getFileFullPath(f fdiff.FilePatch) string {
  40. fn := "???"
  41. from, to := f.Files()
  42. if from != nil {
  43. return from.Path()
  44. } else if to != nil {
  45. return to.Path()
  46. }
  47. return fn
  48. }
  49. // getFileName accepts a string with full path and returns only path
  50. func getFilePath(fullpath string) string {
  51. return filepath.Dir(fullpath)
  52. }
  53. // getFileName accepts a string with full path and returns only filename
  54. func getFileName(fullpath string) string {
  55. return filepath.Base(fullpath)
  56. }
  57. // aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
  58. // trippedEntropy checks if a given capture group or offender falls in between entropy ranges
  59. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  60. func trippedEntropy(groups []string, rule config.Rule) bool {
  61. for _, e := range rule.Entropies {
  62. if len(groups) > e.Group {
  63. entropy := shannonEntropy(groups[e.Group])
  64. if entropy >= e.Min && entropy <= e.Max {
  65. return true
  66. }
  67. }
  68. }
  69. return false
  70. }
  71. // getShannonEntropy https://en.wiktionary.org/wiki/Shannon_entropy
  72. func shannonEntropy(data string) (entropy float64) {
  73. if data == "" {
  74. return 0
  75. }
  76. charCounts := make(map[rune]int)
  77. for _, char := range data {
  78. charCounts[char]++
  79. }
  80. invLength := 1.0 / float64(len(data))
  81. for _, count := range charCounts {
  82. freq := float64(count) * invLength
  83. entropy -= freq * math.Log2(freq)
  84. }
  85. return entropy
  86. }
  87. // Checks if the given rule has a regex
  88. func ruleContainRegex(rule config.Rule) bool {
  89. if rule.Regex == nil {
  90. return false
  91. }
  92. if rule.Regex.String() == "" {
  93. return false
  94. }
  95. return true
  96. }
  97. // Checks if the given rule has a file name regex
  98. func ruleContainFileNameRegex(rule config.Rule) bool {
  99. if rule.FileNameRegex == nil {
  100. return false
  101. }
  102. if rule.FileNameRegex.String() == "" {
  103. return false
  104. }
  105. return true
  106. }
  107. // Checks if the given rule has a file path regex
  108. func ruleContainFilePathRegex(rule config.Rule) bool {
  109. if rule.FilePathRegex == nil {
  110. return false
  111. }
  112. if rule.FilePathRegex.String() == "" {
  113. return false
  114. }
  115. return true
  116. }
  117. func sendLeak(offender string, line string, filename string, rule config.Rule, c *object.Commit, repo *Repo) {
  118. if repo.Manager.Opts.Redact {
  119. line = strings.ReplaceAll(line, offender, "REDACTED")
  120. offender = "REDACTED"
  121. }
  122. repo.Manager.SendLeaks(manager.Leak{
  123. Line: line,
  124. Offender: offender,
  125. Commit: c.Hash.String(),
  126. Repo: repo.Name,
  127. Message: c.Message,
  128. Rule: rule.Description,
  129. Author: c.Author.Name,
  130. Email: c.Author.Email,
  131. Date: c.Author.When,
  132. Tags: strings.Join(rule.Tags, ", "),
  133. File: filename,
  134. })
  135. }
  136. // InspectFile accepts a file content, fullpath of file, commit and repo. If the file is
  137. // binary OR if a file is matched on whitelisted files set in the configuration, then gitleaks
  138. // will skip auditing that file. It will check first if rules apply to this file comparing filename
  139. // and path to their respective rule regexes and inspect file content with inspectFileContents after.
  140. func InspectFile(content string, fullpath string, c *object.Commit, repo *Repo) {
  141. filename := getFileName(fullpath)
  142. path := getFilePath(fullpath)
  143. // We want to check if there is a whitelist for this file
  144. if len(repo.config.Whitelist.Files) != 0 {
  145. for _, reFileName := range repo.config.Whitelist.Files {
  146. if fileMatched(filename, reFileName) {
  147. log.Debugf("whitelisted file found, skipping audit of file: %s", filename)
  148. return
  149. }
  150. }
  151. }
  152. // We want to check if there is a whitelist for this path
  153. if len(repo.config.Whitelist.Paths) != 0 {
  154. for _, reFilePath := range repo.config.Whitelist.Paths {
  155. if fileMatched(path, reFilePath) {
  156. log.Debugf("file in whitelisted path found, skipping audit of file: %s", filename)
  157. return
  158. }
  159. }
  160. }
  161. for _, rule := range repo.config.Rules {
  162. start := time.Now()
  163. // For each rule we want to check filename whitelists
  164. if isFileNameWhiteListed(filename, rule.Whitelist) || isFilePathWhiteListed(path, rule.Whitelist) {
  165. continue
  166. }
  167. // If it has fileNameRegex and it doesnt match we continue to next rule
  168. if ruleContainFileNameRegex(rule) && !fileMatched(filename, rule.FileNameRegex) {
  169. continue
  170. }
  171. // If it has filePathRegex and it doesnt match we continue to next rule
  172. if ruleContainFilePathRegex(rule) && !fileMatched(path, rule.FilePathRegex) {
  173. continue
  174. }
  175. // If it doesnt contain a content regex then it is a filename regex match
  176. if !ruleContainRegex(rule) {
  177. sendLeak("Filename/path offender: "+filename, "N/A", fullpath, rule, c, repo)
  178. } else {
  179. //otherwise we check if it matches content regex
  180. inspectFileContents(content, fullpath, rule, c, repo)
  181. }
  182. // TODO should return filenameRegex if only file rule
  183. repo.Manager.RecordTime(manager.RegexTime{
  184. Time: howLong(start),
  185. Regex: rule.Regex.String(),
  186. })
  187. }
  188. }
  189. // InspectString accepts a string, commit object, repo, and filename. This function iterates over
  190. // all the rules set by the gitleaks config. If the rule contains entropy checks then entropy will be checked first.
  191. // Next, if the rule contains a regular expression then that will be checked.
  192. func inspectFileContents(content string, path string, rule config.Rule, c *object.Commit, repo *Repo) {
  193. locs := rule.Regex.FindAllIndex([]byte(content), -1)
  194. if len(locs) != 0 {
  195. for _, loc := range locs {
  196. start := loc[0]
  197. end := loc[1]
  198. for start != 0 && content[start] != '\n' {
  199. start = start - 1
  200. }
  201. if start != 0 {
  202. // skip newline
  203. start = start + 1
  204. }
  205. for end < len(content)-1 && content[end] != '\n' {
  206. end = end + 1
  207. }
  208. line := content[start:end]
  209. offender := content[loc[0]:loc[1]]
  210. groups := rule.Regex.FindStringSubmatch(offender)
  211. if isOffenderWhiteListed(offender, rule.Whitelist) {
  212. continue
  213. }
  214. if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
  215. continue
  216. }
  217. sendLeak(offender, line, path, rule, c, repo)
  218. }
  219. }
  220. }
  221. type commitInspector func(c *object.Commit, repo *Repo) error
  222. // inspectCommit accepts a commit hash, repo, and commit inspecting function. A new commit
  223. // object will be created from the hash which will be passed into either inspectCommitPatches
  224. // or inspectFilesAtCommit depending on the options set.
  225. func inspectCommit(hash string, repo *Repo, f commitInspector) error {
  226. repo.Manager.IncrementCommits(1)
  227. h := plumbing.NewHash(hash)
  228. c, err := repo.CommitObject(h)
  229. if err != nil {
  230. return err
  231. }
  232. return f(c, repo)
  233. }
  234. // inspectCommitPatches accepts a commit object and a repo. This function is only called when the --commit=
  235. // option has been set. That option tells gitleaks to look only at a single commit and check the contents
  236. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  237. // whitelisted then those files will be skipped.
  238. func inspectCommitPatches(c *object.Commit, repo *Repo) error {
  239. if len(c.ParentHashes) == 0 {
  240. err := inspectFilesAtCommit(c, repo)
  241. if err != nil {
  242. return err
  243. }
  244. }
  245. return c.Parents().ForEach(func(parent *object.Commit) error {
  246. defer func() {
  247. if err := recover(); err != nil {
  248. // sometimes the patch generation will fail due to a known bug in
  249. // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
  250. // Once a fix has been merged I will remove this recover.
  251. return
  252. }
  253. }()
  254. if repo.timeoutReached() {
  255. return nil
  256. }
  257. start := time.Now()
  258. patch, err := c.Patch(parent)
  259. if err != nil {
  260. return fmt.Errorf("could not generate patch")
  261. }
  262. repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
  263. inspectPatch(patch, c, repo)
  264. return nil
  265. })
  266. }
  267. // inspectFilesAtCommit accepts a commit object and a repo. This function is only called when the --files-at-commit=
  268. // option has been set. That option tells gitleaks to look only at ALL the files at a commit and check the contents
  269. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  270. // whitelisted then those files will be skipped.
  271. func inspectFilesAtCommit(c *object.Commit, repo *Repo) error {
  272. fIter, err := c.Files()
  273. if err != nil {
  274. return err
  275. }
  276. err = fIter.ForEach(func(f *object.File) error {
  277. bin, err := f.IsBinary()
  278. if bin || repo.timeoutReached() {
  279. return nil
  280. } else if err != nil {
  281. return err
  282. }
  283. content, err := f.Contents()
  284. if err != nil {
  285. return err
  286. }
  287. InspectFile(content, f.Name, c, repo)
  288. return nil
  289. })
  290. return err
  291. }
  292. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  293. // of goroutines that will spawn during gitleaks execution
  294. func howManyThreads(threads int) int {
  295. maxThreads := runtime.GOMAXPROCS(0)
  296. if threads == 0 {
  297. return 1
  298. } else if threads > maxThreads {
  299. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  300. return maxThreads
  301. }
  302. return threads
  303. }
  304. func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
  305. for _, hash := range whitelistedCommits {
  306. if commitHash == hash {
  307. return true
  308. }
  309. }
  310. return false
  311. }
  312. func isOffenderWhiteListed(offender string, whitelist []config.Whitelist) bool {
  313. if len(whitelist) != 0 {
  314. for _, wl := range whitelist {
  315. if wl.Regex.FindString(offender) != "" {
  316. return true
  317. }
  318. }
  319. }
  320. return false
  321. }
  322. func isFileNameWhiteListed(filename string, whitelist []config.Whitelist) bool {
  323. if len(whitelist) != 0 {
  324. for _, wl := range whitelist {
  325. if fileMatched(filename, wl.File) {
  326. return true
  327. }
  328. }
  329. }
  330. return false
  331. }
  332. func isFilePathWhiteListed(filepath string, whitelist []config.Whitelist) bool {
  333. if len(whitelist) != 0 {
  334. for _, wl := range whitelist {
  335. if fileMatched(filepath, wl.Path) {
  336. return true
  337. }
  338. }
  339. }
  340. return false
  341. }
  342. func fileMatched(f interface{}, re *regexp.Regexp) bool {
  343. if re == nil {
  344. return false
  345. }
  346. switch f.(type) {
  347. case nil:
  348. return false
  349. case string:
  350. if re.FindString(f.(string)) != "" {
  351. return true
  352. }
  353. return false
  354. case *object.File:
  355. if re.FindString(f.(*object.File).Name) != "" {
  356. return true
  357. }
  358. return false
  359. }
  360. return false
  361. }
  362. // getLogOptions determines what log options are used when iterating through commits.
  363. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  364. // gitleaks gets the full git history.
  365. func getLogOptions(repo *Repo) (*git.LogOptions, error) {
  366. var logOpts git.LogOptions
  367. if repo.Manager.Opts.CommitFrom != "" {
  368. logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
  369. }
  370. if repo.Manager.Opts.Branch != "" {
  371. refs, err := repo.Storer.IterReferences()
  372. if err != nil {
  373. return nil, err
  374. }
  375. err = refs.ForEach(func(ref *plumbing.Reference) error {
  376. if ref.Name().IsTag() {
  377. return nil
  378. }
  379. // check heads first
  380. if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
  381. logOpts = git.LogOptions{
  382. From: ref.Hash(),
  383. }
  384. return nil
  385. } else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
  386. logOpts = git.LogOptions{
  387. From: ref.Hash(),
  388. }
  389. return nil
  390. }
  391. return nil
  392. })
  393. if logOpts.From.IsZero() {
  394. return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
  395. }
  396. return &logOpts, nil
  397. }
  398. if !logOpts.From.IsZero() {
  399. return &logOpts, nil
  400. }
  401. return &git.LogOptions{All: true}, nil
  402. }
  403. // howLong accepts a time.Time object which is subtracted from time.Now() and
  404. // converted to nanoseconds which is returned
  405. func howLong(t time.Time) int64 {
  406. return time.Now().Sub(t).Nanoseconds()
  407. }