util.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. package audit
  2. import (
  3. "fmt"
  4. "math"
  5. "regexp"
  6. "runtime"
  7. "strings"
  8. "time"
  9. "github.com/zricethezav/gitleaks/config"
  10. "github.com/zricethezav/gitleaks/manager"
  11. log "github.com/sirupsen/logrus"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. fdiff "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  15. "gopkg.in/src-d/go-git.v4/plumbing/object"
  16. )
  17. const maxLineLen = 200
  18. // Inspect patch accepts a patch, commit, and repo. If the patches contains files that are
  19. // binary, then gitleaks will skip auditing that file OR if a file is matched on
  20. // whitelisted files set in the configuration. If a global rule for files is defined and a filename
  21. // matches said global rule, then a laek is sent to the manager.
  22. // After that, file chunks are created which are then inspected by InspectString()
  23. func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
  24. for _, f := range patch.FilePatches() {
  25. if f.IsBinary() {
  26. continue
  27. }
  28. if fileMatched(getFileName(f), repo.config.Whitelist.File) {
  29. log.Debugf("whitelisted file found, skipping audit of file: %s", getFileName(f))
  30. continue
  31. }
  32. if fileMatched(getFileName(f), repo.config.FileRegex) {
  33. repo.Manager.SendLeaks(manager.Leak{
  34. Line: "N/A",
  35. Offender: getFileName(f),
  36. Commit: c.Hash.String(),
  37. Repo: repo.Name,
  38. Rule: "file regex matched" + repo.config.FileRegex.String(),
  39. Author: c.Author.Name,
  40. Email: c.Author.Email,
  41. Date: c.Author.When,
  42. File: getFileName(f),
  43. })
  44. }
  45. for _, chunk := range f.Chunks() {
  46. if chunk.Type() == fdiff.Delete || chunk.Type() == fdiff.Add {
  47. InspectString(chunk.Content(), c, repo, getFileName(f))
  48. }
  49. }
  50. }
  51. }
  52. // getFileName accepts a file patch and returns the filename
  53. func getFileName(f fdiff.FilePatch) string {
  54. fn := "???"
  55. from, to := f.Files()
  56. if from != nil {
  57. return from.Path()
  58. } else if to != nil {
  59. return to.Path()
  60. }
  61. return fn
  62. }
  63. // getShannonEntropy https://en.wiktionary.org/wiki/Shannon_entropy
  64. func shannonEntropy(data string) (entropy float64) {
  65. if data == "" {
  66. return 0
  67. }
  68. charCounts := make(map[rune]int)
  69. for _, char := range data {
  70. charCounts[char]++
  71. }
  72. invLength := 1.0 / float64(len(data))
  73. for _, count := range charCounts {
  74. freq := float64(count) * invLength
  75. entropy -= freq * math.Log2(freq)
  76. }
  77. return entropy
  78. }
  79. // aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
  80. // trippedEntropy checks if a given line falls in between entropy ranges supplied
  81. // by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  82. func trippedEntropy(line string, rule config.Rule) bool {
  83. for _, e := range rule.Entropy {
  84. entropy := shannonEntropy(line)
  85. if entropy > e.P1 && entropy < e.P2 {
  86. return true
  87. }
  88. }
  89. return false
  90. }
  91. func ruleContainRegex(rule config.Rule) bool {
  92. if rule.Regex == nil {
  93. return false
  94. }
  95. if rule.Regex.String() == "" {
  96. return false
  97. }
  98. return true
  99. }
  100. // InspectString accepts a string, commit object, repo, and filename. This function iterates over
  101. // all the rules set by the gitleaks config. If the rule contains entropy checks then entropy will be checked first.
  102. // Next, if the rule contains a regular expression then that will be checked.
  103. func InspectString(content string, c *object.Commit, repo *Repo, filename string) {
  104. for _, rule := range repo.config.Rules {
  105. // check entropy
  106. if len(rule.Entropy) != 0 {
  107. // an optimization would be to switch the regex from FindAllIndex to FindString
  108. // since we are iterating on the lines if entropy rules exist...
  109. for _, line := range strings.Split(content, "\n") {
  110. entropyTripped := trippedEntropy(line, rule)
  111. if entropyTripped && !ruleContainRegex(rule) {
  112. repo.Manager.SendLeaks(manager.Leak{
  113. Line: line,
  114. Offender: fmt.Sprintf("Entropy range %+v", rule.Entropy),
  115. Commit: c.Hash.String(),
  116. Repo: repo.Name,
  117. Message: c.Message,
  118. Rule: rule.Description,
  119. Author: c.Author.Name,
  120. Email: c.Author.Email,
  121. Date: c.Author.When,
  122. Tags: strings.Join(rule.Tags, ", "),
  123. File: filename,
  124. })
  125. } else if entropyTripped {
  126. // entropy has been tripped which means if there is a regex specified in the same
  127. // rule, we need to inspect the line for a regex match. In otherwords, the current rule has
  128. // both entropy and regex set which work in combination. This helps narrow down false positives
  129. // on searches for generic passwords in code.
  130. match := rule.Regex.FindString(line)
  131. // check if any rules are whitelisting this leak
  132. if len(rule.Whitelist) != 0 {
  133. for _, wl := range rule.Whitelist {
  134. if fileMatched(filename, wl.File) {
  135. // if matched, go to next rule
  136. goto NEXTLINE
  137. }
  138. if wl.Regex.FindString(line) != "" {
  139. goto NEXTLINE
  140. }
  141. }
  142. }
  143. if match != "" {
  144. // both the regex and entropy in this rule have been tripped which means this line
  145. // contains a leak
  146. repo.Manager.SendLeaks(manager.Leak{
  147. Line: line,
  148. Offender: match,
  149. Commit: c.Hash.String(),
  150. Message: c.Message,
  151. Repo: repo.Name,
  152. Rule: rule.Description,
  153. Author: c.Author.Name,
  154. Email: c.Author.Email,
  155. Date: c.Author.When,
  156. Tags: strings.Join(rule.Tags, ", "),
  157. File: filename,
  158. })
  159. }
  160. }
  161. NEXTLINE:
  162. }
  163. return
  164. }
  165. if rule.Regex.String() == "" {
  166. continue
  167. }
  168. start := time.Now()
  169. locs := rule.Regex.FindAllIndex([]byte(content), -1)
  170. if len(locs) != 0 {
  171. // check if any rules are whitelisting this leak
  172. if len(rule.Whitelist) != 0 {
  173. for _, wl := range rule.Whitelist {
  174. if fileMatched(filename, wl.File) {
  175. // if matched, go to next rule
  176. goto NEXT
  177. }
  178. }
  179. }
  180. for _, loc := range locs {
  181. start := loc[0]
  182. end := loc[1]
  183. for start != 0 && content[start] != '\n' {
  184. start = start - 1
  185. }
  186. if start != 0 {
  187. // skip newline
  188. start = start + 1
  189. }
  190. for end < len(content)-1 && content[end] != '\n' {
  191. end = end + 1
  192. }
  193. offender := content[loc[0]:loc[1]]
  194. line := content[start:end]
  195. if len(rule.Whitelist) != 0 {
  196. for _, wl := range rule.Whitelist {
  197. if wl.Regex.FindString(line) != "" {
  198. goto NEXT
  199. }
  200. }
  201. }
  202. if repo.Manager.Opts.Redact {
  203. line = strings.ReplaceAll(line, offender, "REDACTED")
  204. offender = "REDACTED"
  205. }
  206. repo.Manager.SendLeaks(manager.Leak{
  207. Line: line,
  208. Offender: offender,
  209. Commit: c.Hash.String(),
  210. Message: c.Message,
  211. Repo: repo.Name,
  212. Rule: rule.Description,
  213. Author: c.Author.Name,
  214. Email: c.Author.Email,
  215. Date: c.Author.When,
  216. Tags: strings.Join(rule.Tags, ", "),
  217. File: filename,
  218. })
  219. }
  220. }
  221. repo.Manager.RecordTime(manager.RegexTime{
  222. Time: time.Now().Sub(start).Nanoseconds(),
  223. Regex: rule.Regex.String(),
  224. })
  225. NEXT:
  226. }
  227. }
  228. // inspectCommit accepts a commit object and a repo. This function is only called when the --commit=
  229. // option has been set. That option tells gitleaks to look only at a single commit and check the contents
  230. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  231. // whitelisted then those files will be skipped.
  232. func inspectCommit(c *object.Commit, repo *Repo) error {
  233. fIter, err := c.Files()
  234. if err != nil {
  235. return err
  236. }
  237. err = fIter.ForEach(func(f *object.File) error {
  238. bin, err := f.IsBinary()
  239. if bin {
  240. return nil
  241. } else if err != nil {
  242. return err
  243. }
  244. if fileMatched(f, repo.config.Whitelist.File) {
  245. log.Debugf("whitelisted file found, skipping audit of file: %s", f.Name)
  246. return nil
  247. }
  248. if fileMatched(f.Name, repo.config.FileRegex) {
  249. repo.Manager.SendLeaks(manager.Leak{
  250. Line: "N/A",
  251. Offender: f.Name,
  252. Commit: c.Hash.String(),
  253. Repo: repo.Name,
  254. Rule: "file regex matched" + repo.config.FileRegex.String(),
  255. Author: c.Author.Name,
  256. Email: c.Author.Email,
  257. Date: c.Author.When,
  258. File: f.Name,
  259. })
  260. }
  261. content, err := f.Contents()
  262. if err != nil {
  263. return err
  264. }
  265. InspectString(content, c, repo, f.Name)
  266. return nil
  267. })
  268. return err
  269. }
  270. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  271. // of goroutines that will spawn during gitleaks execution
  272. func howManyThreads(threads int) int {
  273. maxThreads := runtime.GOMAXPROCS(0)
  274. if threads == 0 {
  275. return 1
  276. } else if threads > maxThreads {
  277. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  278. return maxThreads
  279. }
  280. return threads
  281. }
  282. func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
  283. for _, hash := range whitelistedCommits {
  284. if commitHash == hash {
  285. return true
  286. }
  287. }
  288. return false
  289. }
  290. func fileMatched(f interface{}, re *regexp.Regexp) bool {
  291. if re == nil {
  292. return false
  293. }
  294. switch f.(type) {
  295. case nil:
  296. return false
  297. case string:
  298. if re.FindString(f.(string)) != "" {
  299. return true
  300. }
  301. return false
  302. case *object.File:
  303. if re.FindString(f.(*object.File).Name) != "" {
  304. return true
  305. }
  306. return false
  307. }
  308. return false
  309. }
  310. // getLogOptions determines what log options are used when iterating through commits.
  311. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  312. // gitleaks gets the full git history.
  313. func getLogOptions(repo *Repo) (*git.LogOptions, error) {
  314. if repo.Manager.Opts.Branch != "" {
  315. var logOpts git.LogOptions
  316. refs, err := repo.Storer.IterReferences()
  317. if err != nil {
  318. return nil, err
  319. }
  320. err = refs.ForEach(func(ref *plumbing.Reference) error {
  321. if ref.Name().IsTag() {
  322. return nil
  323. }
  324. // check heads first
  325. if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
  326. logOpts = git.LogOptions{
  327. From: ref.Hash(),
  328. }
  329. return nil
  330. } else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
  331. logOpts = git.LogOptions{
  332. From: ref.Hash(),
  333. }
  334. return nil
  335. }
  336. return nil
  337. })
  338. if logOpts.From.IsZero() {
  339. return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
  340. }
  341. return &logOpts, nil
  342. }
  343. return &git.LogOptions{All: true}, nil
  344. }
  345. // howLong accepts a time.Time object which is subtracted from time.Now() and
  346. // converted to nanoseconds which is returned
  347. func howLong(t time.Time) int64 {
  348. return time.Now().Sub(t).Nanoseconds()
  349. }