4
0

util.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. package audit
  2. import (
  3. "fmt"
  4. "math"
  5. "path/filepath"
  6. "regexp"
  7. "runtime"
  8. "strings"
  9. "time"
  10. "github.com/zricethezav/gitleaks/v4/config"
  11. "github.com/zricethezav/gitleaks/v4/manager"
  12. "github.com/go-git/go-git/v5"
  13. "github.com/go-git/go-git/v5/plumbing"
  14. fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
  15. "github.com/go-git/go-git/v5/plumbing/object"
  16. log "github.com/sirupsen/logrus"
  17. )
  18. // Inspect patch accepts a patch, commit, and repo. If the patches contains files that are
  19. // binary, then gitleaks will skip auditing that file OR if a file is matched on
  20. // whitelisted files set in the configuration. If a global rule for files is defined and a filename
  21. // matches said global rule, then a leak is sent to the manager.
  22. // After that, file chunks are created which are then inspected by InspectString()
  23. func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
  24. for _, f := range patch.FilePatches() {
  25. if repo.timeoutReached() {
  26. return
  27. }
  28. if f.IsBinary() {
  29. continue
  30. }
  31. for _, chunk := range f.Chunks() {
  32. if chunk.Type() == fdiff.Delete || chunk.Type() == fdiff.Add {
  33. InspectFile(chunk.Content(), getFileFullPath(f), c, repo)
  34. }
  35. }
  36. }
  37. }
  38. // getFileName accepts a file patch and returns the filename
  39. func getFileFullPath(f fdiff.FilePatch) string {
  40. fn := "???"
  41. from, to := f.Files()
  42. if from != nil {
  43. return from.Path()
  44. } else if to != nil {
  45. return to.Path()
  46. }
  47. return fn
  48. }
  49. // getFileName accepts a string with full path and returns only path
  50. func getFilePath(fullpath string) string {
  51. return filepath.Dir(fullpath)
  52. }
  53. // getFileName accepts a string with full path and returns only filename
  54. func getFileName(fullpath string) string {
  55. return filepath.Base(fullpath)
  56. }
  57. // aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
  58. // trippedEntropy checks if a given capture group or offender falls in between entropy ranges
  59. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  60. func trippedEntropy(groups []string, rule config.Rule) bool {
  61. for _, e := range rule.Entropies {
  62. if len(groups) > e.Group {
  63. entropy := shannonEntropy(groups[e.Group])
  64. if entropy >= e.Min && entropy <= e.Max {
  65. return true
  66. }
  67. }
  68. }
  69. return false
  70. }
  71. // getShannonEntropy https://en.wiktionary.org/wiki/Shannon_entropy
  72. func shannonEntropy(data string) (entropy float64) {
  73. if data == "" {
  74. return 0
  75. }
  76. charCounts := make(map[rune]int)
  77. for _, char := range data {
  78. charCounts[char]++
  79. }
  80. invLength := 1.0 / float64(len(data))
  81. for _, count := range charCounts {
  82. freq := float64(count) * invLength
  83. entropy -= freq * math.Log2(freq)
  84. }
  85. return entropy
  86. }
  87. // Checks if the given rule has a regex
  88. func ruleContainRegex(rule config.Rule) bool {
  89. if rule.Regex == nil {
  90. return false
  91. }
  92. if rule.Regex.String() == "" {
  93. return false
  94. }
  95. return true
  96. }
  97. // Checks if the given rule has a file name regex
  98. func ruleContainFileNameRegex(rule config.Rule) bool {
  99. if rule.FileNameRegex == nil {
  100. return false
  101. }
  102. if rule.FileNameRegex.String() == "" {
  103. return false
  104. }
  105. return true
  106. }
  107. // Checks if the given rule has a file path regex
  108. func ruleContainFilePathRegex(rule config.Rule) bool {
  109. if rule.FilePathRegex == nil {
  110. return false
  111. }
  112. if rule.FilePathRegex.String() == "" {
  113. return false
  114. }
  115. return true
  116. }
  117. func sendLeak(offender string, line string, filename string, rule config.Rule, c *object.Commit, repo *Repo) {
  118. repo.Manager.SendLeaks(manager.Leak{
  119. Line: line,
  120. Offender: offender,
  121. Commit: c.Hash.String(),
  122. Repo: repo.Name,
  123. Message: c.Message,
  124. Rule: rule.Description,
  125. Author: c.Author.Name,
  126. Email: c.Author.Email,
  127. Date: c.Author.When,
  128. Tags: strings.Join(rule.Tags, ", "),
  129. File: filename,
  130. })
  131. }
  132. // InspectFile accepts a file content, fullpath of file, commit and repo. If the file is
  133. // binary OR if a file is matched on whitelisted files set in the configuration, then gitleaks
  134. // will skip auditing that file. It will check first if rules apply to this file comparing filename
  135. // and path to their respective rule regexes and inspect file content with inspectFileContents after.
  136. func InspectFile(content string, fullpath string, c *object.Commit, repo *Repo) {
  137. filename := getFileName(fullpath)
  138. path := getFilePath(fullpath)
  139. // We want to check if there is a whitelist for this file
  140. if len(repo.config.Whitelist.Files) != 0 {
  141. for _, reFileName := range repo.config.Whitelist.Files {
  142. if RegexMatched(filename, reFileName) {
  143. log.Debugf("whitelisted file found, skipping audit of file: %s", filename)
  144. return
  145. }
  146. }
  147. }
  148. // We want to check if there is a whitelist for this path
  149. if len(repo.config.Whitelist.Paths) != 0 {
  150. for _, reFilePath := range repo.config.Whitelist.Paths {
  151. if RegexMatched(path, reFilePath) {
  152. log.Debugf("file in whitelisted path found, skipping audit of file: %s", filename)
  153. return
  154. }
  155. }
  156. }
  157. for _, rule := range repo.config.Rules {
  158. start := time.Now()
  159. // For each rule we want to check filename whitelists
  160. if isFileNameWhiteListed(filename, rule.Whitelist) || isFilePathWhiteListed(path, rule.Whitelist) {
  161. continue
  162. }
  163. // If it has fileNameRegex and it doesnt match we continue to next rule
  164. if ruleContainFileNameRegex(rule) && !RegexMatched(filename, rule.FileNameRegex) {
  165. continue
  166. }
  167. // If it has filePathRegex and it doesnt match we continue to next rule
  168. if ruleContainFilePathRegex(rule) && !RegexMatched(path, rule.FilePathRegex) {
  169. continue
  170. }
  171. // If it doesnt contain a content regex then it is a filename regex match
  172. if !ruleContainRegex(rule) {
  173. sendLeak("Filename/path offender: "+filename, "N/A", fullpath, rule, c, repo)
  174. } else {
  175. //otherwise we check if it matches content regex
  176. inspectFileContents(content, fullpath, rule, c, repo)
  177. }
  178. // TODO should return filenameRegex if only file rule
  179. repo.Manager.RecordTime(manager.RegexTime{
  180. Time: howLong(start),
  181. Regex: rule.Regex.String(),
  182. })
  183. }
  184. }
  185. // InspectString accepts a string, commit object, repo, and filename. This function iterates over
  186. // all the rules set by the gitleaks config. If the rule contains entropy checks then entropy will be checked first.
  187. // Next, if the rule contains a regular expression then that will be checked.
  188. func inspectFileContents(content string, path string, rule config.Rule, c *object.Commit, repo *Repo) {
  189. locs := rule.Regex.FindAllIndex([]byte(content), -1)
  190. if len(locs) != 0 {
  191. for _, loc := range locs {
  192. start := loc[0]
  193. end := loc[1]
  194. for start != 0 && content[start] != '\n' {
  195. start = start - 1
  196. }
  197. if start != 0 {
  198. // skip newline
  199. start = start + 1
  200. }
  201. for end < len(content)-1 && content[end] != '\n' {
  202. end = end + 1
  203. }
  204. line := content[start:end]
  205. offender := content[loc[0]:loc[1]]
  206. groups := rule.Regex.FindStringSubmatch(offender)
  207. if isOffenderWhiteListed(offender, rule.Whitelist) {
  208. continue
  209. }
  210. if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
  211. continue
  212. }
  213. sendLeak(offender, line, path, rule, c, repo)
  214. }
  215. }
  216. }
  217. type commitInspector func(c *object.Commit, repo *Repo) error
  218. // inspectCommit accepts a commit hash, repo, and commit inspecting function. A new commit
  219. // object will be created from the hash which will be passed into either inspectCommitPatches
  220. // or inspectFilesAtCommit depending on the options set.
  221. func inspectCommit(commit string, repo *Repo, f commitInspector) error {
  222. if commit == "latest" {
  223. ref, err := repo.Repository.Head()
  224. if err != nil {
  225. return err
  226. }
  227. commit = ref.Hash().String()
  228. }
  229. repo.Manager.IncrementCommits(1)
  230. h := plumbing.NewHash(commit)
  231. c, err := repo.CommitObject(h)
  232. if err != nil {
  233. return err
  234. }
  235. return f(c, repo)
  236. }
  237. // inspectCommitPatches accepts a commit object and a repo. This function is only called when the --commit=
  238. // option has been set. That option tells gitleaks to look only at a single commit and check the contents
  239. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  240. // whitelisted then those files will be skipped.
  241. func inspectCommitPatches(c *object.Commit, repo *Repo) error {
  242. if len(c.ParentHashes) == 0 {
  243. err := inspectFilesAtCommit(c, repo)
  244. if err != nil {
  245. return err
  246. }
  247. }
  248. return c.Parents().ForEach(func(parent *object.Commit) error {
  249. defer func() {
  250. if err := recover(); err != nil {
  251. // sometimes the patch generation will fail due to a known bug in
  252. // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
  253. // Once a fix has been merged I will remove this recover.
  254. return
  255. }
  256. }()
  257. if repo.timeoutReached() {
  258. return nil
  259. }
  260. start := time.Now()
  261. patch, err := c.Patch(parent)
  262. if err != nil {
  263. return fmt.Errorf("could not generate patch")
  264. }
  265. repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
  266. inspectPatch(patch, c, repo)
  267. return nil
  268. })
  269. }
  270. // inspectFilesAtCommit accepts a commit object and a repo. This function is only called when the --files-at-commit=
  271. // option has been set. That option tells gitleaks to look only at ALL the files at a commit and check the contents
  272. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  273. // whitelisted then those files will be skipped.
  274. func inspectFilesAtCommit(c *object.Commit, repo *Repo) error {
  275. fIter, err := c.Files()
  276. if err != nil {
  277. return err
  278. }
  279. err = fIter.ForEach(func(f *object.File) error {
  280. bin, err := f.IsBinary()
  281. if bin || repo.timeoutReached() {
  282. return nil
  283. } else if err != nil {
  284. return err
  285. }
  286. content, err := f.Contents()
  287. if err != nil {
  288. return err
  289. }
  290. InspectFile(content, f.Name, c, repo)
  291. return nil
  292. })
  293. return err
  294. }
  295. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  296. // of goroutines that will spawn during gitleaks execution
  297. func howManyThreads(threads int) int {
  298. maxThreads := runtime.GOMAXPROCS(0)
  299. if threads == 0 {
  300. return 1
  301. } else if threads > maxThreads {
  302. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  303. return maxThreads
  304. }
  305. return threads
  306. }
  307. func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
  308. for _, hash := range whitelistedCommits {
  309. if commitHash == hash {
  310. return true
  311. }
  312. }
  313. return false
  314. }
  315. func isOffenderWhiteListed(offender string, whitelist []config.Whitelist) bool {
  316. if len(whitelist) != 0 {
  317. for _, wl := range whitelist {
  318. if wl.Regex.FindString(offender) != "" {
  319. return true
  320. }
  321. }
  322. }
  323. return false
  324. }
  325. func isFileNameWhiteListed(filename string, whitelist []config.Whitelist) bool {
  326. if len(whitelist) != 0 {
  327. for _, wl := range whitelist {
  328. if RegexMatched(filename, wl.File) {
  329. return true
  330. }
  331. }
  332. }
  333. return false
  334. }
  335. func isFilePathWhiteListed(filepath string, whitelist []config.Whitelist) bool {
  336. if len(whitelist) != 0 {
  337. for _, wl := range whitelist {
  338. if RegexMatched(filepath, wl.Path) {
  339. return true
  340. }
  341. }
  342. }
  343. return false
  344. }
  345. // RegexMatched matched an interface to a regular expression. The interface f can
  346. // be a string type or go-git *object.File type.
  347. func RegexMatched(f interface{}, re *regexp.Regexp) bool {
  348. if re == nil {
  349. return false
  350. }
  351. switch f.(type) {
  352. case nil:
  353. return false
  354. case string:
  355. if re.FindString(f.(string)) != "" {
  356. return true
  357. }
  358. return false
  359. case *object.File:
  360. if re.FindString(f.(*object.File).Name) != "" {
  361. return true
  362. }
  363. return false
  364. }
  365. return false
  366. }
  367. // getLogOptions determines what log options are used when iterating through commits.
  368. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  369. // gitleaks gets the full git history.
  370. func getLogOptions(repo *Repo) (*git.LogOptions, error) {
  371. var logOpts git.LogOptions
  372. const dateformat string = "2006-01-02"
  373. const timeformat string = "2006-01-02T15:04:05-0700"
  374. if repo.Manager.Opts.CommitFrom != "" {
  375. logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
  376. }
  377. if repo.Manager.Opts.CommitSince != "" {
  378. if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitSince); err == nil {
  379. logOpts.Since = &t
  380. } else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitSince); err == nil {
  381. logOpts.Since = &t
  382. } else {
  383. return nil, err
  384. }
  385. }
  386. if repo.Manager.Opts.CommitUntil != "" {
  387. if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitUntil); err == nil {
  388. logOpts.Until = &t
  389. } else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitUntil); err == nil {
  390. logOpts.Until = &t
  391. } else {
  392. return nil, err
  393. }
  394. }
  395. if repo.Manager.Opts.Branch != "" {
  396. refs, err := repo.Storer.IterReferences()
  397. if err != nil {
  398. return nil, err
  399. }
  400. err = refs.ForEach(func(ref *plumbing.Reference) error {
  401. if ref.Name().IsTag() {
  402. return nil
  403. }
  404. // check heads first
  405. if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
  406. logOpts = git.LogOptions{
  407. From: ref.Hash(),
  408. }
  409. return nil
  410. } else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
  411. logOpts = git.LogOptions{
  412. From: ref.Hash(),
  413. }
  414. return nil
  415. }
  416. return nil
  417. })
  418. if logOpts.From.IsZero() {
  419. return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
  420. }
  421. return &logOpts, nil
  422. }
  423. if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
  424. return &logOpts, nil
  425. }
  426. return &git.LogOptions{All: true}, nil
  427. }
  428. // howLong accepts a time.Time object which is subtracted from time.Now() and
  429. // converted to nanoseconds which is returned
  430. func howLong(t time.Time) int64 {
  431. return time.Now().Sub(t).Nanoseconds()
  432. }