util.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. package audit
  2. import (
  3. "fmt"
  4. "math"
  5. "regexp"
  6. "runtime"
  7. "strings"
  8. "time"
  9. "github.com/zricethezav/gitleaks/v3/config"
  10. "github.com/zricethezav/gitleaks/v3/manager"
  11. log "github.com/sirupsen/logrus"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. fdiff "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  15. "gopkg.in/src-d/go-git.v4/plumbing/object"
  16. )
  17. // Inspect patch accepts a patch, commit, and repo. If the patches contains files that are
  18. // binary, then gitleaks will skip auditing that file OR if a file is matched on
  19. // whitelisted files set in the configuration. If a global rule for files is defined and a filename
  20. // matches said global rule, then a leak is sent to the manager.
  21. // After that, file chunks are created which are then inspected by InspectString()
  22. func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
  23. for _, f := range patch.FilePatches() {
  24. if repo.timeoutReached() {
  25. return
  26. }
  27. if f.IsBinary() {
  28. continue
  29. }
  30. if fileMatched(getFileName(f), repo.config.Whitelist.File) {
  31. log.Debugf("whitelisted file found, skipping audit of file: %s", getFileName(f))
  32. continue
  33. }
  34. if fileMatched(getFileName(f), repo.config.FileRegex) {
  35. repo.Manager.SendLeaks(manager.Leak{
  36. Line: "N/A",
  37. Offender: getFileName(f),
  38. Commit: c.Hash.String(),
  39. Repo: repo.Name,
  40. Rule: "file regex matched" + repo.config.FileRegex.String(),
  41. Author: c.Author.Name,
  42. Email: c.Author.Email,
  43. Date: c.Author.When,
  44. File: getFileName(f),
  45. })
  46. }
  47. for _, chunk := range f.Chunks() {
  48. if chunk.Type() == fdiff.Delete || chunk.Type() == fdiff.Add {
  49. InspectString(chunk.Content(), c, repo, getFileName(f))
  50. }
  51. }
  52. }
  53. }
  54. // getFileName accepts a file patch and returns the filename
  55. func getFileName(f fdiff.FilePatch) string {
  56. fn := "???"
  57. from, to := f.Files()
  58. if from != nil {
  59. return from.Path()
  60. } else if to != nil {
  61. return to.Path()
  62. }
  63. return fn
  64. }
  65. // getShannonEntropy https://en.wiktionary.org/wiki/Shannon_entropy
  66. func shannonEntropy(data string) (entropy float64) {
  67. if data == "" {
  68. return 0
  69. }
  70. charCounts := make(map[rune]int)
  71. for _, char := range data {
  72. charCounts[char]++
  73. }
  74. invLength := 1.0 / float64(len(data))
  75. for _, count := range charCounts {
  76. freq := float64(count) * invLength
  77. entropy -= freq * math.Log2(freq)
  78. }
  79. return entropy
  80. }
  81. // aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
  82. // trippedEntropy checks if a given groups or offender falls in between entropy ranges
  83. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  84. func trippedEntropy(groups []string, rule config.Rule) bool {
  85. for _, e := range rule.Entropies {
  86. if len(groups) > e.Group {
  87. entropy := shannonEntropy(groups[e.Group])
  88. if entropy >= e.Min && entropy <= e.Max {
  89. return true
  90. }
  91. }
  92. }
  93. return false
  94. }
  95. func ruleContainRegex(rule config.Rule) bool {
  96. if rule.Regex == nil {
  97. return false
  98. }
  99. if rule.Regex.String() == "" {
  100. return false
  101. }
  102. return true
  103. }
  104. // InspectString accepts a string, commit object, repo, and filename. This function iterates over
  105. // all the rules set by the gitleaks config. If the rule contains entropy checks then entropy will be checked first.
  106. // Next, if the rule contains a regular expression then that will be checked.
  107. func InspectString(content string, c *object.Commit, repo *Repo, filename string) {
  108. for _, rule := range repo.config.Rules {
  109. if rule.Regex.String() == "" {
  110. continue
  111. }
  112. if repo.timeoutReached() {
  113. return
  114. }
  115. start := time.Now()
  116. locs := rule.Regex.FindAllIndex([]byte(content), -1)
  117. if len(locs) != 0 {
  118. // check if any rules are whitelisting this leak
  119. if len(rule.Whitelist) != 0 {
  120. for _, wl := range rule.Whitelist {
  121. if fileMatched(filename, wl.File) {
  122. // if matched, go to next rule
  123. goto NEXT
  124. }
  125. }
  126. }
  127. for _, loc := range locs {
  128. start := loc[0]
  129. end := loc[1]
  130. for start != 0 && content[start] != '\n' {
  131. start = start - 1
  132. }
  133. if start != 0 {
  134. // skip newline
  135. start = start + 1
  136. }
  137. for end < len(content)-1 && content[end] != '\n' {
  138. end = end + 1
  139. }
  140. line := content[start:end]
  141. offender := content[loc[0]:loc[1]]
  142. groups := rule.Regex.FindStringSubmatch(offender)
  143. if len(rule.Whitelist) != 0 {
  144. for _, wl := range rule.Whitelist {
  145. if wl.Regex.FindString(offender) != "" {
  146. goto NEXT
  147. }
  148. }
  149. }
  150. if len(rule.Entropies) != 0 {
  151. if trippedEntropy(groups, rule) {
  152. if repo.Manager.Opts.Redact {
  153. line = strings.ReplaceAll(line, offender, "REDACTED")
  154. offender = "REDACTED"
  155. }
  156. repo.Manager.SendLeaks(manager.Leak{
  157. Line: line,
  158. Offender: offender,
  159. Commit: c.Hash.String(),
  160. Repo: repo.Name,
  161. Message: c.Message,
  162. Rule: rule.Description,
  163. Author: c.Author.Name,
  164. Email: c.Author.Email,
  165. Date: c.Author.When,
  166. Tags: strings.Join(rule.Tags, ", "),
  167. File: filename,
  168. })
  169. }
  170. } else {
  171. if repo.Manager.Opts.Redact {
  172. line = strings.ReplaceAll(line, offender, "REDACTED")
  173. offender = "REDACTED"
  174. }
  175. repo.Manager.SendLeaks(manager.Leak{
  176. Line: line,
  177. Offender: offender,
  178. Commit: c.Hash.String(),
  179. Message: c.Message,
  180. Repo: repo.Name,
  181. Rule: rule.Description,
  182. Author: c.Author.Name,
  183. Email: c.Author.Email,
  184. Date: c.Author.When,
  185. Tags: strings.Join(rule.Tags, ", "),
  186. File: filename,
  187. })
  188. }
  189. }
  190. }
  191. repo.Manager.RecordTime(manager.RegexTime{
  192. Time: time.Now().Sub(start).Nanoseconds(),
  193. Regex: rule.Regex.String(),
  194. })
  195. NEXT:
  196. }
  197. }
  198. type commitInspector func(c *object.Commit, repo *Repo) error
  199. // inspectCommit accepts a commit hash, repo, and commit inspecting function. A new commit
  200. // object will be created from the hash which will be passed into either inspectCommitPatches
  201. // or inspectFilesAtCommit depending on the options set.
  202. func inspectCommit(hash string, repo *Repo, f commitInspector) error {
  203. repo.Manager.IncrementCommits(1)
  204. h := plumbing.NewHash(hash)
  205. c, err := repo.CommitObject(h)
  206. if err != nil {
  207. return err
  208. }
  209. return f(c, repo)
  210. }
  211. // inspectCommitPatches accepts a commit object and a repo. This function is only called when the --commit=
  212. // option has been set. That option tells gitleaks to look only at a single commit and check the contents
  213. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  214. // whitelisted then those files will be skipped.
  215. func inspectCommitPatches(c *object.Commit, repo *Repo) error {
  216. if len(c.ParentHashes) == 0 {
  217. err := inspectFilesAtCommit(c, repo)
  218. if err != nil {
  219. return err
  220. }
  221. }
  222. return c.Parents().ForEach(func(parent *object.Commit) error {
  223. defer func() {
  224. if err := recover(); err != nil {
  225. // sometimes the patch generation will fail due to a known bug in
  226. // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
  227. // Once a fix has been merged I will remove this recover.
  228. return
  229. }
  230. }()
  231. if repo.timeoutReached() {
  232. return nil
  233. }
  234. start := time.Now()
  235. patch, err := c.Patch(parent)
  236. if err != nil {
  237. return fmt.Errorf("could not generate patch")
  238. }
  239. repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
  240. inspectPatch(patch, c, repo)
  241. return nil
  242. })
  243. }
  244. // inspectFilesAtCommit accepts a commit object and a repo. This function is only called when the --files-at-commit=
  245. // option has been set. That option tells gitleaks to look only at ALL the files at a commit and check the contents
  246. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  247. // whitelisted then those files will be skipped.
  248. func inspectFilesAtCommit(c *object.Commit, repo *Repo) error {
  249. fIter, err := c.Files()
  250. if err != nil {
  251. return err
  252. }
  253. err = fIter.ForEach(func(f *object.File) error {
  254. bin, err := f.IsBinary()
  255. if bin || repo.timeoutReached() {
  256. return nil
  257. } else if err != nil {
  258. return err
  259. }
  260. if fileMatched(f, repo.config.Whitelist.File) {
  261. log.Debugf("whitelisted file found, skipping audit of file: %s", f.Name)
  262. return nil
  263. }
  264. if fileMatched(f.Name, repo.config.FileRegex) {
  265. repo.Manager.SendLeaks(manager.Leak{
  266. Line: "N/A",
  267. Offender: f.Name,
  268. Commit: c.Hash.String(),
  269. Repo: repo.Name,
  270. Rule: "file regex matched" + repo.config.FileRegex.String(),
  271. Author: c.Author.Name,
  272. Email: c.Author.Email,
  273. Date: c.Author.When,
  274. File: f.Name,
  275. })
  276. }
  277. content, err := f.Contents()
  278. if err != nil {
  279. return err
  280. }
  281. InspectString(content, c, repo, f.Name)
  282. return nil
  283. })
  284. return err
  285. }
  286. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  287. // of goroutines that will spawn during gitleaks execution
  288. func howManyThreads(threads int) int {
  289. maxThreads := runtime.GOMAXPROCS(0)
  290. if threads == 0 {
  291. return 1
  292. } else if threads > maxThreads {
  293. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  294. return maxThreads
  295. }
  296. return threads
  297. }
  298. func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
  299. for _, hash := range whitelistedCommits {
  300. if commitHash == hash {
  301. return true
  302. }
  303. }
  304. return false
  305. }
  306. func fileMatched(f interface{}, re *regexp.Regexp) bool {
  307. if re == nil {
  308. return false
  309. }
  310. switch f.(type) {
  311. case nil:
  312. return false
  313. case string:
  314. if re.FindString(f.(string)) != "" {
  315. return true
  316. }
  317. return false
  318. case *object.File:
  319. if re.FindString(f.(*object.File).Name) != "" {
  320. return true
  321. }
  322. return false
  323. }
  324. return false
  325. }
  326. // getLogOptions determines what log options are used when iterating through commits.
  327. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  328. // gitleaks gets the full git history.
  329. func getLogOptions(repo *Repo) (*git.LogOptions, error) {
  330. var logOpts git.LogOptions
  331. if repo.Manager.Opts.CommitFrom != "" {
  332. logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
  333. }
  334. if repo.Manager.Opts.Branch != "" {
  335. refs, err := repo.Storer.IterReferences()
  336. if err != nil {
  337. return nil, err
  338. }
  339. err = refs.ForEach(func(ref *plumbing.Reference) error {
  340. if ref.Name().IsTag() {
  341. return nil
  342. }
  343. // check heads first
  344. if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
  345. logOpts = git.LogOptions{
  346. From: ref.Hash(),
  347. }
  348. return nil
  349. } else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
  350. logOpts = git.LogOptions{
  351. From: ref.Hash(),
  352. }
  353. return nil
  354. }
  355. return nil
  356. })
  357. if logOpts.From.IsZero() {
  358. return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
  359. }
  360. return &logOpts, nil
  361. }
  362. if !logOpts.From.IsZero() {
  363. return &logOpts, nil
  364. }
  365. return &git.LogOptions{All: true}, nil
  366. }
  367. // howLong accepts a time.Time object which is subtracted from time.Now() and
  368. // converted to nanoseconds which is returned
  369. func howLong(t time.Time) int64 {
  370. return time.Now().Sub(t).Nanoseconds()
  371. }