util.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. package audit
  2. import (
  3. "fmt"
  4. "math"
  5. "regexp"
  6. "runtime"
  7. "strings"
  8. "time"
  9. "github.com/zricethezav/gitleaks/config"
  10. "github.com/zricethezav/gitleaks/manager"
  11. log "github.com/sirupsen/logrus"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. fdiff "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  15. "gopkg.in/src-d/go-git.v4/plumbing/object"
  16. )
  17. // Inspect patch accepts a patch, commit, and repo. If the patches contains files that are
  18. // binary, then gitleaks will skip auditing that file OR if a file is matched on
  19. // whitelisted files set in the configuration. If a global rule for files is defined and a filename
  20. // matches said global rule, then a leak is sent to the manager.
  21. // After that, file chunks are created which are then inspected by InspectString()
  22. func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
  23. for _, f := range patch.FilePatches() {
  24. if f.IsBinary() {
  25. continue
  26. }
  27. if fileMatched(getFileName(f), repo.config.Whitelist.File) {
  28. log.Debugf("whitelisted file found, skipping audit of file: %s", getFileName(f))
  29. continue
  30. }
  31. if fileMatched(getFileName(f), repo.config.FileRegex) {
  32. repo.Manager.SendLeaks(manager.Leak{
  33. Line: "N/A",
  34. Offender: getFileName(f),
  35. Commit: c.Hash.String(),
  36. Repo: repo.Name,
  37. Rule: "file regex matched" + repo.config.FileRegex.String(),
  38. Author: c.Author.Name,
  39. Email: c.Author.Email,
  40. Date: c.Author.When,
  41. File: getFileName(f),
  42. })
  43. }
  44. for _, chunk := range f.Chunks() {
  45. if chunk.Type() == fdiff.Delete || chunk.Type() == fdiff.Add {
  46. InspectString(chunk.Content(), c, repo, getFileName(f))
  47. }
  48. }
  49. }
  50. }
  51. // getFileName accepts a file patch and returns the filename
  52. func getFileName(f fdiff.FilePatch) string {
  53. fn := "???"
  54. from, to := f.Files()
  55. if from != nil {
  56. return from.Path()
  57. } else if to != nil {
  58. return to.Path()
  59. }
  60. return fn
  61. }
  62. // getShannonEntropy https://en.wiktionary.org/wiki/Shannon_entropy
  63. func shannonEntropy(data string) (entropy float64) {
  64. if data == "" {
  65. return 0
  66. }
  67. charCounts := make(map[rune]int)
  68. for _, char := range data {
  69. charCounts[char]++
  70. }
  71. invLength := 1.0 / float64(len(data))
  72. for _, count := range charCounts {
  73. freq := float64(count) * invLength
  74. entropy -= freq * math.Log2(freq)
  75. }
  76. return entropy
  77. }
  78. // aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
  79. // trippedEntropy checks if a given line falls in between entropy ranges supplied
  80. // by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  81. func trippedEntropy(line string, rule config.Rule) bool {
  82. for _, e := range rule.Entropy {
  83. entropy := shannonEntropy(line)
  84. if entropy > e.P1 && entropy < e.P2 {
  85. return true
  86. }
  87. }
  88. return false
  89. }
  90. func ruleContainRegex(rule config.Rule) bool {
  91. if rule.Regex == nil {
  92. return false
  93. }
  94. if rule.Regex.String() == "" {
  95. return false
  96. }
  97. return true
  98. }
  99. // InspectString accepts a string, commit object, repo, and filename. This function iterates over
  100. // all the rules set by the gitleaks config. If the rule contains entropy checks then entropy will be checked first.
  101. // Next, if the rule contains a regular expression then that will be checked.
  102. func InspectString(content string, c *object.Commit, repo *Repo, filename string) {
  103. for _, rule := range repo.config.Rules {
  104. // check entropy
  105. if len(rule.Entropy) != 0 {
  106. // an optimization would be to switch the regex from FindAllIndex to FindString
  107. // since we are iterating on the lines if entropy rules exist...
  108. for _, line := range strings.Split(content, "\n") {
  109. entropyTripped := trippedEntropy(line, rule)
  110. if entropyTripped && !ruleContainRegex(rule) {
  111. repo.Manager.SendLeaks(manager.Leak{
  112. Line: line,
  113. Offender: fmt.Sprintf("Entropy range %+v", rule.Entropy),
  114. Commit: c.Hash.String(),
  115. Repo: repo.Name,
  116. Message: c.Message,
  117. Rule: rule.Description,
  118. Author: c.Author.Name,
  119. Email: c.Author.Email,
  120. Date: c.Author.When,
  121. Tags: strings.Join(rule.Tags, ", "),
  122. File: filename,
  123. })
  124. } else if entropyTripped {
  125. // entropy has been tripped which means if there is a regex specified in the same
  126. // rule, we need to inspect the line for a regex match. In otherwords, the current rule has
  127. // both entropy and regex set which work in combination. This helps narrow down false positives
  128. // on searches for generic passwords in code.
  129. match := rule.Regex.FindString(line)
  130. // check if any rules are whitelisting this leak
  131. if len(rule.Whitelist) != 0 {
  132. for _, wl := range rule.Whitelist {
  133. if fileMatched(filename, wl.File) {
  134. // if matched, go to next rule
  135. goto NEXTLINE
  136. }
  137. if wl.Regex.FindString(line) != "" {
  138. goto NEXTLINE
  139. }
  140. }
  141. }
  142. if match != "" {
  143. // both the regex and entropy in this rule have been tripped which means this line
  144. // contains a leak
  145. repo.Manager.SendLeaks(manager.Leak{
  146. Line: line,
  147. Offender: match,
  148. Commit: c.Hash.String(),
  149. Message: c.Message,
  150. Repo: repo.Name,
  151. Rule: rule.Description,
  152. Author: c.Author.Name,
  153. Email: c.Author.Email,
  154. Date: c.Author.When,
  155. Tags: strings.Join(rule.Tags, ", "),
  156. File: filename,
  157. })
  158. }
  159. }
  160. NEXTLINE:
  161. }
  162. return
  163. }
  164. if rule.Regex.String() == "" {
  165. continue
  166. }
  167. start := time.Now()
  168. locs := rule.Regex.FindAllIndex([]byte(content), -1)
  169. if len(locs) != 0 {
  170. // check if any rules are whitelisting this leak
  171. if len(rule.Whitelist) != 0 {
  172. for _, wl := range rule.Whitelist {
  173. if fileMatched(filename, wl.File) {
  174. // if matched, go to next rule
  175. goto NEXT
  176. }
  177. }
  178. }
  179. for _, loc := range locs {
  180. start := loc[0]
  181. end := loc[1]
  182. for start != 0 && content[start] != '\n' {
  183. start = start - 1
  184. }
  185. if start != 0 {
  186. // skip newline
  187. start = start + 1
  188. }
  189. for end < len(content)-1 && content[end] != '\n' {
  190. end = end + 1
  191. }
  192. offender := content[loc[0]:loc[1]]
  193. line := content[start:end]
  194. if len(rule.Whitelist) != 0 {
  195. for _, wl := range rule.Whitelist {
  196. if wl.Regex.FindString(line) != "" {
  197. goto NEXT
  198. }
  199. }
  200. }
  201. if repo.Manager.Opts.Redact {
  202. line = strings.ReplaceAll(line, offender, "REDACTED")
  203. offender = "REDACTED"
  204. }
  205. repo.Manager.SendLeaks(manager.Leak{
  206. Line: line,
  207. Offender: offender,
  208. Commit: c.Hash.String(),
  209. Message: c.Message,
  210. Repo: repo.Name,
  211. Rule: rule.Description,
  212. Author: c.Author.Name,
  213. Email: c.Author.Email,
  214. Date: c.Author.When,
  215. Tags: strings.Join(rule.Tags, ", "),
  216. File: filename,
  217. })
  218. }
  219. }
  220. repo.Manager.RecordTime(manager.RegexTime{
  221. Time: time.Now().Sub(start).Nanoseconds(),
  222. Regex: rule.Regex.String(),
  223. })
  224. NEXT:
  225. }
  226. }
  227. // inspectCommit accepts a commit object and a repo. This function is only called when the --commit=
  228. // option has been set. That option tells gitleaks to look only at a single commit and check the contents
  229. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  230. // whitelisted then those files will be skipped.
  231. func inspectCommit(c *object.Commit, repo *Repo) error {
  232. fIter, err := c.Files()
  233. if err != nil {
  234. return err
  235. }
  236. err = fIter.ForEach(func(f *object.File) error {
  237. bin, err := f.IsBinary()
  238. if bin {
  239. return nil
  240. } else if err != nil {
  241. return err
  242. }
  243. if fileMatched(f, repo.config.Whitelist.File) {
  244. log.Debugf("whitelisted file found, skipping audit of file: %s", f.Name)
  245. return nil
  246. }
  247. if fileMatched(f.Name, repo.config.FileRegex) {
  248. repo.Manager.SendLeaks(manager.Leak{
  249. Line: "N/A",
  250. Offender: f.Name,
  251. Commit: c.Hash.String(),
  252. Repo: repo.Name,
  253. Rule: "file regex matched" + repo.config.FileRegex.String(),
  254. Author: c.Author.Name,
  255. Email: c.Author.Email,
  256. Date: c.Author.When,
  257. File: f.Name,
  258. })
  259. }
  260. content, err := f.Contents()
  261. if err != nil {
  262. return err
  263. }
  264. InspectString(content, c, repo, f.Name)
  265. return nil
  266. })
  267. return err
  268. }
  269. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  270. // of goroutines that will spawn during gitleaks execution
  271. func howManyThreads(threads int) int {
  272. maxThreads := runtime.GOMAXPROCS(0)
  273. if threads == 0 {
  274. return 1
  275. } else if threads > maxThreads {
  276. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  277. return maxThreads
  278. }
  279. return threads
  280. }
  281. func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
  282. for _, hash := range whitelistedCommits {
  283. if commitHash == hash {
  284. return true
  285. }
  286. }
  287. return false
  288. }
  289. func fileMatched(f interface{}, re *regexp.Regexp) bool {
  290. if re == nil {
  291. return false
  292. }
  293. switch f.(type) {
  294. case nil:
  295. return false
  296. case string:
  297. if re.FindString(f.(string)) != "" {
  298. return true
  299. }
  300. return false
  301. case *object.File:
  302. if re.FindString(f.(*object.File).Name) != "" {
  303. return true
  304. }
  305. return false
  306. }
  307. return false
  308. }
  309. // getLogOptions determines what log options are used when iterating through commits.
  310. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  311. // gitleaks gets the full git history.
  312. func getLogOptions(repo *Repo) (*git.LogOptions, error) {
  313. var logOpts git.LogOptions
  314. if repo.Manager.Opts.CommitFrom != "" {
  315. logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
  316. }
  317. if repo.Manager.Opts.Branch != "" {
  318. refs, err := repo.Storer.IterReferences()
  319. if err != nil {
  320. return nil, err
  321. }
  322. err = refs.ForEach(func(ref *plumbing.Reference) error {
  323. if ref.Name().IsTag() {
  324. return nil
  325. }
  326. // check heads first
  327. if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
  328. logOpts = git.LogOptions{
  329. From: ref.Hash(),
  330. }
  331. return nil
  332. } else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
  333. logOpts = git.LogOptions{
  334. From: ref.Hash(),
  335. }
  336. return nil
  337. }
  338. return nil
  339. })
  340. if logOpts.From.IsZero() {
  341. return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
  342. }
  343. return &logOpts, nil
  344. }
  345. if !logOpts.From.IsZero() {
  346. return &logOpts, nil
  347. }
  348. return &git.LogOptions{All: true}, nil
  349. }
  350. // howLong accepts a time.Time object which is subtracted from time.Now() and
  351. // converted to nanoseconds which is returned
  352. func howLong(t time.Time) int64 {
  353. return time.Now().Sub(t).Nanoseconds()
  354. }