util.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. package audit
  2. import (
  3. "fmt"
  4. log "github.com/sirupsen/logrus"
  5. "github.com/zricethezav/gitleaks/config"
  6. "github.com/zricethezav/gitleaks/manager"
  7. "gopkg.in/src-d/go-git.v4"
  8. "gopkg.in/src-d/go-git.v4/plumbing"
  9. fdiff "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  10. "gopkg.in/src-d/go-git.v4/plumbing/object"
  11. "math"
  12. "path"
  13. "regexp"
  14. "runtime"
  15. "strings"
  16. "time"
  17. )
  18. const maxLineLen = 200
  19. // Inspect patch accepts a patch, commit, and repo. If the patches contains files that are
  20. // binary, then gitleaks will skip auditing that file OR if a file is matched on
  21. // whitelisted files set in the configuration. If a global rule for files is defined and a filename
  22. // matches said global rule, then a laek is sent to the manager.
  23. // After that, file chunks are created which are then inspected by InspectString()
  24. func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
  25. for _, f := range patch.FilePatches() {
  26. if f.IsBinary() {
  27. continue
  28. }
  29. if fileMatched(getFileName(f), repo.config.Whitelist.File) {
  30. log.Debugf("whitelisted file found, skipping audit of file: %s", getFileName(f))
  31. continue
  32. }
  33. if fileMatched(getFileName(f), repo.config.FileRegex) {
  34. repo.Manager.SendLeaks(manager.Leak{
  35. Line: "N/A",
  36. Offender: getFileName(f),
  37. Commit: c.Hash.String(),
  38. Repo: repo.Name,
  39. Rule: "file regex matched" + repo.config.FileRegex.String(),
  40. Author: c.Author.Name,
  41. Email: c.Author.Email,
  42. Date: c.Author.When,
  43. File: getFileName(f),
  44. })
  45. }
  46. for _, chunk := range f.Chunks() {
  47. if chunk.Type() == fdiff.Delete || chunk.Type() == fdiff.Add {
  48. InspectString(chunk.Content(), c, repo, getFileName(f))
  49. }
  50. }
  51. }
  52. }
  53. // getFileName accepts a file patch and returns the filename
  54. func getFileName(f fdiff.FilePatch) string {
  55. fn := "???"
  56. from, to := f.Files()
  57. if from != nil {
  58. return path.Base(from.Path())
  59. } else if to != nil {
  60. return path.Base(to.Path())
  61. }
  62. return fn
  63. }
  64. // getShannonEntropy https://en.wiktionary.org/wiki/Shannon_entropy
  65. func shannonEntropy(data string) (entropy float64) {
  66. if data == "" {
  67. return 0
  68. }
  69. charCounts := make(map[rune]int)
  70. for _, char := range data {
  71. charCounts[char]++
  72. }
  73. invLength := 1.0 / float64(len(data))
  74. for _, count := range charCounts {
  75. freq := float64(count) * invLength
  76. entropy -= freq * math.Log2(freq)
  77. }
  78. return entropy
  79. }
  80. // aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
  81. // trippedEntropy checks if a given line falls in between entropy ranges supplied
  82. // by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  83. func trippedEntropy(line string, rule config.Rule) bool {
  84. for _, e := range rule.Entropy {
  85. entropy := shannonEntropy(line)
  86. if entropy > e.P1 && entropy < e.P2 {
  87. return true
  88. }
  89. }
  90. return false
  91. }
  92. func ruleContainRegex(rule config.Rule) bool {
  93. if rule.Regex == nil {
  94. return false
  95. }
  96. if rule.Regex.String() == "" {
  97. return false
  98. }
  99. return true
  100. }
  101. // InspectString accepts a string, commit object, repo, and filename. This function iterates over
  102. // all the rules set by the gitleaks config. If the rule contains entropy checks then entropy will be checked first.
  103. // Next, if the rule contains a regular expression then that will be checked.
  104. func InspectString(content string, c *object.Commit, repo *Repo, filename string) {
  105. for _, rule := range repo.config.Rules {
  106. // check entropy
  107. if len(rule.Entropy) != 0 {
  108. // an optimization would be to switch the regex from FindAllIndex to FindString
  109. // since we are iterating on the lines if entropy rules exist...
  110. for _, line := range strings.Split(content, "\n") {
  111. entropyTripped := trippedEntropy(line, rule)
  112. if entropyTripped && !ruleContainRegex(rule) {
  113. _line := line
  114. if len(_line) > maxLineLen {
  115. _line = line[0 : maxLineLen-1]
  116. }
  117. repo.Manager.SendLeaks(manager.Leak{
  118. Line: _line,
  119. Offender: fmt.Sprintf("Entropy range %+v", rule.Entropy),
  120. Commit: c.Hash.String(),
  121. Repo: repo.Name,
  122. Message: c.Message,
  123. Rule: rule.Description,
  124. Author: c.Author.Name,
  125. Email: c.Author.Email,
  126. Date: c.Author.When,
  127. Tags: strings.Join(rule.Tags, ", "),
  128. File: filename,
  129. })
  130. } else if entropyTripped {
  131. // entropy has been tripped which means if there is a regex specified in the same
  132. // rule, we need to inspect the line for a regex match. In otherwords, the current rule has
  133. // both entropy and regex set which work in combination. This helps narrow down false positives
  134. // on searches for generic passwords in code.
  135. match := rule.Regex.FindString(line)
  136. // check if any rules are whitelisting this leak
  137. if len(rule.Whitelist) != 0 {
  138. for _, wl := range rule.Whitelist {
  139. if fileMatched(filename, wl.File) {
  140. // if matched, go to next rule
  141. goto NEXTLINE
  142. }
  143. if wl.Regex.FindString(line) != "" {
  144. goto NEXTLINE
  145. }
  146. }
  147. }
  148. if match != "" {
  149. // both the regex and entropy in this rule have been tripped which means this line
  150. // contains a leak
  151. repo.Manager.SendLeaks(manager.Leak{
  152. Line: line,
  153. Offender: match,
  154. Commit: c.Hash.String(),
  155. Message: c.Message,
  156. Repo: repo.Name,
  157. Rule: rule.Description,
  158. Author: c.Author.Name,
  159. Email: c.Author.Email,
  160. Date: c.Author.When,
  161. Tags: strings.Join(rule.Tags, ", "),
  162. File: filename,
  163. })
  164. }
  165. }
  166. NEXTLINE:
  167. }
  168. return
  169. }
  170. if rule.Regex.String() == "" {
  171. continue
  172. }
  173. start := time.Now()
  174. locs := rule.Regex.FindAllIndex([]byte(content), -1)
  175. if len(locs) != 0 {
  176. // check if any rules are whitelisting this leak
  177. if len(rule.Whitelist) != 0 {
  178. for _, wl := range rule.Whitelist {
  179. if fileMatched(filename, wl.File) {
  180. // if matched, go to next rule
  181. goto NEXT
  182. }
  183. }
  184. }
  185. for _, loc := range locs {
  186. start := loc[0]
  187. end := loc[1]
  188. for start != 0 && content[start] != '\n' {
  189. start = start - 1
  190. }
  191. if start != 0 {
  192. // skip newline
  193. start = start + 1
  194. }
  195. for end < len(content)-1 && content[end] != '\n' {
  196. end = end + 1
  197. }
  198. offender := content[loc[0]:loc[1]]
  199. line := content[start:end]
  200. if len(rule.Whitelist) != 0 {
  201. for _, wl := range rule.Whitelist {
  202. if wl.Regex.FindString(line) != "" {
  203. goto NEXT
  204. }
  205. }
  206. }
  207. if repo.Manager.Opts.Redact {
  208. line = strings.ReplaceAll(line, offender, "REDACTED")
  209. offender = "REDACTED"
  210. }
  211. repo.Manager.SendLeaks(manager.Leak{
  212. Line: line,
  213. Offender: offender,
  214. Commit: c.Hash.String(),
  215. Message: c.Message,
  216. Repo: repo.Name,
  217. Rule: rule.Description,
  218. Author: c.Author.Name,
  219. Email: c.Author.Email,
  220. Date: c.Author.When,
  221. Tags: strings.Join(rule.Tags, ", "),
  222. File: filename,
  223. })
  224. }
  225. }
  226. repo.Manager.RecordTime(manager.RegexTime{
  227. Time: time.Now().Sub(start).Nanoseconds(),
  228. Regex: rule.Regex.String(),
  229. })
  230. NEXT:
  231. }
  232. }
  233. // inspectCommit accepts a commit object and a repo. This function is only called when the --commit=
  234. // option has been set. That option tells gitleaks to look only at a single commit and check the contents
  235. // of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
  236. // whitelisted then those files will be skipped.
  237. func inspectCommit(c *object.Commit, repo *Repo) error {
  238. fIter, err := c.Files()
  239. if err != nil {
  240. return err
  241. }
  242. err = fIter.ForEach(func(f *object.File) error {
  243. bin, err := f.IsBinary()
  244. if bin {
  245. return nil
  246. } else if err != nil {
  247. return err
  248. }
  249. if fileMatched(f, repo.config.Whitelist.File) {
  250. log.Debugf("whitelisted file found, skipping audit of file: %s", f.Name)
  251. return nil
  252. }
  253. if fileMatched(f.Name, repo.config.FileRegex) {
  254. repo.Manager.SendLeaks(manager.Leak{
  255. Line: "N/A",
  256. Offender: f.Name,
  257. Commit: c.Hash.String(),
  258. Repo: repo.Name,
  259. Rule: "file regex matched" + repo.config.FileRegex.String(),
  260. Author: c.Author.Name,
  261. Email: c.Author.Email,
  262. Date: c.Author.When,
  263. File: f.Name,
  264. })
  265. }
  266. content, err := f.Contents()
  267. if err != nil {
  268. return err
  269. }
  270. InspectString(content, c, repo, f.Name)
  271. return nil
  272. })
  273. return err
  274. }
  275. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  276. // of goroutines that will spawn during gitleaks execution
  277. func howManyThreads(threads int) int {
  278. maxThreads := runtime.GOMAXPROCS(0)
  279. if threads == 0 {
  280. return 1
  281. } else if threads > maxThreads {
  282. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  283. return maxThreads
  284. }
  285. return threads
  286. }
  287. func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
  288. for _, hash := range whitelistedCommits {
  289. if commitHash == hash {
  290. return true
  291. }
  292. }
  293. return false
  294. }
  295. func fileMatched(f interface{}, re *regexp.Regexp) bool {
  296. if re == nil {
  297. return false
  298. }
  299. switch f.(type) {
  300. case nil:
  301. return false
  302. case string:
  303. if re.FindString(f.(string)) != "" {
  304. return true
  305. }
  306. return false
  307. case *object.File:
  308. if re.FindString(f.(*object.File).Name) != "" {
  309. return true
  310. }
  311. return false
  312. }
  313. return false
  314. }
  315. // getLogOptions determines what log options are used when iterating through commits.
  316. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  317. // gitleaks gets the full git history.
  318. func getLogOptions(repo *Repo) (*git.LogOptions, error) {
  319. if repo.Manager.Opts.Branch != "" {
  320. var logOpts git.LogOptions
  321. refs, err := repo.Storer.IterReferences()
  322. if err != nil {
  323. return nil, err
  324. }
  325. err = refs.ForEach(func(ref *plumbing.Reference) error {
  326. if ref.Name().IsTag() {
  327. return nil
  328. }
  329. // check heads first
  330. if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
  331. logOpts = git.LogOptions{
  332. From: ref.Hash(),
  333. }
  334. return nil
  335. } else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
  336. logOpts = git.LogOptions{
  337. From: ref.Hash(),
  338. }
  339. return nil
  340. }
  341. return nil
  342. })
  343. if logOpts.From.IsZero() {
  344. return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
  345. }
  346. return &logOpts, nil
  347. }
  348. return &git.LogOptions{All: true}, nil
  349. }
  350. // howLong accepts a time.Time object which is subtracted from time.Now() and
  351. // converted to nanoseconds which is returned
  352. func howLong(t time.Time) int64 {
  353. return time.Now().Sub(t).Nanoseconds()
  354. }