utils.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. package gitleaks
  2. import (
  3. "encoding/csv"
  4. "encoding/json"
  5. "fmt"
  6. "io/ioutil"
  7. "os"
  8. "path"
  9. "strings"
  10. "time"
  11. log "github.com/sirupsen/logrus"
  12. )
  13. // writeReport writes a report to a file specified in the --report= option.
  14. // Default format for report is JSON. You can use the --csv option to write the report as a csv
  15. func writeReport(leaks []Leak) error {
  16. if len(leaks) == 0 {
  17. return nil
  18. }
  19. log.Infof("writing report to %s", opts.Report)
  20. if strings.HasSuffix(opts.Report, ".csv") {
  21. f, err := os.Create(opts.Report)
  22. if err != nil {
  23. return err
  24. }
  25. defer f.Close()
  26. w := csv.NewWriter(f)
  27. w.Write([]string{"repo", "line", "commit", "offender", "rule", "info", "tags", "severity", "commitMsg", "author", "email", "file", "date"})
  28. for _, leak := range leaks {
  29. w.Write([]string{leak.Repo, leak.Line, leak.Commit, leak.Offender, leak.Rule, leak.Info, leak.Tags, leak.Severity, leak.Message, leak.Author, leak.Email, leak.File, leak.Date.Format(time.RFC3339)})
  30. }
  31. w.Flush()
  32. } else {
  33. f, err := os.Create(opts.Report)
  34. if err != nil {
  35. return err
  36. }
  37. defer f.Close()
  38. encoder := json.NewEncoder(f)
  39. encoder.SetIndent("", "\t")
  40. if _, err := f.WriteString("[\n"); err != nil {
  41. return err
  42. }
  43. for i := 0; i < len(leaks); i++ {
  44. if err := encoder.Encode(leaks[i]); err != nil {
  45. return err
  46. }
  47. // for all but the last leak, seek back and overwrite the newline appended by Encode() with comma & newline
  48. if i+1 < len(leaks) {
  49. if _, err := f.Seek(-1, 1); err != nil {
  50. return err
  51. }
  52. if _, err := f.WriteString(",\n"); err != nil {
  53. return err
  54. }
  55. }
  56. }
  57. if _, err := f.WriteString("]"); err != nil {
  58. return err
  59. }
  60. if err := f.Sync(); err != nil {
  61. log.Error(err)
  62. return err
  63. }
  64. }
  65. return nil
  66. }
  67. // check rule will inspect a single line and return a leak if it encounters one
  68. func (rule *Rule) check(line string, commit *Commit) (*Leak, error) {
  69. var (
  70. match string
  71. fileMatch string
  72. entropy float64
  73. entropyWord string
  74. )
  75. for _, f := range rule.fileTypes {
  76. fileMatch = f.FindString(commit.filePath)
  77. if fileMatch != "" {
  78. break
  79. }
  80. }
  81. if fileMatch == "" && len(rule.fileTypes) != 0 {
  82. return nil, nil
  83. }
  84. if rule.entropies != nil {
  85. if rule.entropyROI == "word" {
  86. words := strings.Fields(line)
  87. for _, word := range words {
  88. _entropy := getShannonEntropy(word)
  89. for _, e := range rule.entropies {
  90. if _entropy > e.v1 && _entropy < e.v2 {
  91. entropy = _entropy
  92. entropyWord = word
  93. goto postEntropy
  94. }
  95. }
  96. }
  97. } else {
  98. _entropy := getShannonEntropy(line)
  99. for _, e := range rule.entropies {
  100. if _entropy > e.v1 && _entropy < e.v2 {
  101. entropy = _entropy
  102. entropyWord = line
  103. goto postEntropy
  104. }
  105. }
  106. }
  107. }
  108. postEntropy:
  109. if rule.regex != nil {
  110. match = rule.regex.FindString(line)
  111. }
  112. if match != "" && entropy != 0.0 {
  113. return newLeak(line, fmt.Sprintf("%s regex match and entropy met at %.2f", rule.regex.String(), entropy), entropyWord, rule, commit), nil
  114. } else if match != "" && rule.entropies == nil {
  115. return newLeak(line, fmt.Sprintf("%s regex match", rule.regex.String()), match, rule, commit), nil
  116. } else if entropy != 0.0 && rule.regex.String() == "" {
  117. return newLeak(line, fmt.Sprintf("entropy met at %.2f", entropy), entropyWord, rule, commit), nil
  118. }
  119. return nil, nil
  120. }
  121. // inspect will parse each line of the git diff's content against a set of regexes or
  122. // a set of regexes set by the config (see gitleaks.toml for example). This function
  123. // will skip lines that include a whitelisted regex. A list of leaks is returned.
  124. // If verbose mode (-v/--verbose) is set, then checkDiff will log leaks as they are discovered.
  125. func inspect(commit *Commit) []Leak {
  126. var leaks []Leak
  127. lines := strings.Split(commit.content, "\n")
  128. for _, line := range lines {
  129. for _, rule := range config.Rules {
  130. if isLineWhitelisted(line) {
  131. break
  132. }
  133. leak, err := rule.check(line, commit)
  134. if err != nil || leak == nil {
  135. continue
  136. }
  137. leaks = append(leaks, *leak)
  138. }
  139. }
  140. return leaks
  141. }
  142. // isLineWhitelisted returns true iff the line is matched by at least one of the whiteListRegexes.
  143. func isLineWhitelisted(line string) bool {
  144. for _, wRe := range config.WhiteList.regexes {
  145. whitelistMatch := wRe.FindString(line)
  146. if whitelistMatch != "" {
  147. return true
  148. }
  149. }
  150. return false
  151. }
  152. func newLeak(line string, info string, offender string, rule *Rule, commit *Commit) *Leak {
  153. leak := &Leak{
  154. Line: line,
  155. Commit: commit.sha,
  156. Offender: offender,
  157. Rule: rule.description,
  158. Info: info,
  159. Author: commit.author,
  160. Email: commit.email,
  161. File: commit.filePath,
  162. Repo: commit.repoName,
  163. Message: commit.message,
  164. Date: commit.date,
  165. Tags: strings.Join(rule.tags, ", "),
  166. Severity: rule.severity,
  167. }
  168. if opts.Redact {
  169. leak.Offender = "REDACTED"
  170. leak.Line = strings.Replace(line, offender, "REDACTED", -1)
  171. }
  172. if opts.Verbose {
  173. leak.log()
  174. }
  175. return leak
  176. }
  177. // discoverRepos walks all the children of `path`. If a child directory
  178. // contain a .git subdirectory then that repo will be added to the list of repos returned
  179. func discoverRepos(ownerPath string) ([]*Repo, error) {
  180. var (
  181. err error
  182. repoDs []*Repo
  183. )
  184. files, err := ioutil.ReadDir(ownerPath)
  185. if err != nil {
  186. return repoDs, err
  187. }
  188. for _, f := range files {
  189. repoPath := path.Join(ownerPath, f.Name())
  190. if f.IsDir() && containsGit(repoPath) {
  191. repoDs = append(repoDs, &Repo{
  192. name: f.Name(),
  193. path: repoPath,
  194. })
  195. }
  196. }
  197. return repoDs, err
  198. }
  199. func (leak Leak) log() {
  200. b, _ := json.MarshalIndent(leak, "", " ")
  201. fmt.Println(string(b))
  202. }
  203. func containsGit(repoPath string) bool {
  204. if _, err := os.Stat(repoPath); os.IsNotExist(err) {
  205. return false
  206. }
  207. return true
  208. }