unstaged.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. package scan
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "os/exec"
  7. "strings"
  8. "time"
  9. "github.com/zricethezav/gitleaks/v7/report"
  10. "github.com/go-git/go-git/v5"
  11. "github.com/go-git/go-git/v5/plumbing"
  12. "github.com/sergi/go-diff/diffmatchpatch"
  13. )
  14. // UnstagedScanner is an unstaged scanner. This is the scanner used when you don't provide program arguments
  15. // which will then scan your PWD. This scans unstaged changes in your repo.
  16. type UnstagedScanner struct {
  17. BaseScanner
  18. repo *git.Repository
  19. repoName string
  20. }
  21. // NewUnstagedScanner returns an unstaged scanner
  22. func NewUnstagedScanner(base BaseScanner, repo *git.Repository) *UnstagedScanner {
  23. us := &UnstagedScanner{
  24. BaseScanner: base,
  25. repo: repo,
  26. repoName: getRepoName(base.opts),
  27. }
  28. us.scannerType = typeUnstagedScanner
  29. return us
  30. }
  31. // Scan kicks off an unstaged scan. This will attempt to determine unstaged changes which are then scanned.
  32. func (us *UnstagedScanner) Scan() (report.Report, error) {
  33. var scannerReport report.Report
  34. r, err := us.repo.Head()
  35. if err == plumbing.ErrReferenceNotFound {
  36. wt, err := us.repo.Worktree()
  37. if err != nil {
  38. return scannerReport, err
  39. }
  40. status, err := wt.Status()
  41. if err != nil {
  42. return scannerReport, err
  43. }
  44. for fn := range status {
  45. workTreeBuf := bytes.NewBuffer(nil)
  46. workTreeFile, err := wt.Filesystem.Open(fn)
  47. if err != nil {
  48. continue
  49. }
  50. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  51. return scannerReport, err
  52. }
  53. leaks := checkRules(us.BaseScanner, emptyCommit(), us.repoName, workTreeFile.Name(), workTreeBuf.String())
  54. for _, leak := range leaks {
  55. if us.opts.Verbose {
  56. logLeak(leak, us.opts.Redact)
  57. }
  58. scannerReport.Leaks = append(scannerReport.Leaks, leak)
  59. }
  60. }
  61. return scannerReport, nil
  62. } else if err != nil {
  63. return scannerReport, err
  64. }
  65. c, err := us.repo.CommitObject(r.Hash())
  66. if err != nil {
  67. return scannerReport, err
  68. }
  69. // Staged change so the Commit details do not yet exist. Insert empty defaults.
  70. c.Hash = plumbing.Hash{}
  71. c.Message = ""
  72. c.Author.Name = ""
  73. c.Author.Email = ""
  74. c.Author.When = time.Unix(0, 0).UTC()
  75. prevTree, err := c.Tree()
  76. if err != nil {
  77. return scannerReport, err
  78. }
  79. wt, err := us.repo.Worktree()
  80. if err != nil {
  81. return scannerReport, err
  82. }
  83. status, err := gitStatus(wt)
  84. if err != nil {
  85. return scannerReport, err
  86. }
  87. for fn, state := range status {
  88. var (
  89. prevFileContents string
  90. currFileContents string
  91. filename string
  92. )
  93. if state.Staging != git.Untracked {
  94. if state.Staging == git.Deleted {
  95. // file in staging has been deleted, aka it is not on the filesystem
  96. // so the contents of the file are ""
  97. currFileContents = ""
  98. } else {
  99. workTreeBuf := bytes.NewBuffer(nil)
  100. workTreeFile, err := wt.Filesystem.Open(fn)
  101. if err != nil {
  102. continue
  103. }
  104. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  105. return scannerReport, err
  106. }
  107. currFileContents = workTreeBuf.String()
  108. filename = workTreeFile.Name()
  109. }
  110. // get files at HEAD state
  111. prevFile, err := prevTree.File(fn)
  112. if err != nil {
  113. prevFileContents = ""
  114. } else {
  115. prevFileContents, err = prevFile.Contents()
  116. if err != nil {
  117. return scannerReport, err
  118. }
  119. if filename == "" {
  120. filename = prevFile.Name
  121. }
  122. }
  123. dmp := diffmatchpatch.New()
  124. diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
  125. prettyDiff := diffPrettyText(diffs)
  126. var diffContents string
  127. for _, d := range diffs {
  128. if d.Type == diffmatchpatch.DiffInsert {
  129. diffContents += fmt.Sprintf("%s\n", d.Text)
  130. }
  131. }
  132. leaks := checkRules(us.BaseScanner, c, us.repoName, filename, diffContents)
  133. lineLookup := make(map[string]bool)
  134. for _, leak := range leaks {
  135. for lineNumber, line := range strings.Split(prettyDiff, "\n") {
  136. if strings.HasPrefix(line, diffAddPrefix) && strings.Contains(line, leak.Line) {
  137. if _, ok := lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)]; !ok {
  138. lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)] = true
  139. leak.LineNumber = lineNumber + 1
  140. if us.opts.Verbose {
  141. logLeak(leak, us.opts.Redact)
  142. }
  143. scannerReport.Leaks = append(scannerReport.Leaks, leak)
  144. break
  145. }
  146. }
  147. }
  148. }
  149. }
  150. }
  151. return scannerReport, err
  152. }
  153. // DiffPrettyText converts a []Diff into a colored text report.
  154. // TODO open PR for this
  155. func diffPrettyText(diffs []diffmatchpatch.Diff) string {
  156. var buff bytes.Buffer
  157. for _, diff := range diffs {
  158. text := diff.Text
  159. switch diff.Type {
  160. case diffmatchpatch.DiffInsert:
  161. _, _ = buff.WriteString("+")
  162. _, _ = buff.WriteString(text)
  163. case diffmatchpatch.DiffDelete:
  164. _, _ = buff.WriteString("-")
  165. _, _ = buff.WriteString(text)
  166. case diffmatchpatch.DiffEqual:
  167. _, _ = buff.WriteString(text)
  168. }
  169. }
  170. return buff.String()
  171. }
  172. // gitStatus returns the status of modified files in the worktree. It will attempt to execute 'git status'
  173. // and will fall back to git.Worktree.Status() if that fails.
  174. func gitStatus(wt *git.Worktree) (git.Status, error) {
  175. c := exec.Command("git", "status", "--porcelain", "-z")
  176. c.Dir = wt.Filesystem.Root()
  177. output, err := c.Output()
  178. if err != nil {
  179. stat, err := wt.Status()
  180. return stat, err
  181. }
  182. lines := strings.Split(string(output), "\000")
  183. stat := make(map[string]*git.FileStatus, len(lines))
  184. for _, line := range lines {
  185. if len(line) == 0 {
  186. continue
  187. }
  188. // For copy/rename the output looks like
  189. // R destination\000source
  190. // Which means we can split on space and ignore anything with only one result
  191. parts := strings.SplitN(strings.TrimLeft(line, " "), " ", 2)
  192. if len(parts) == 2 {
  193. stat[strings.Trim(parts[1], " ")] = &git.FileStatus{
  194. Staging: git.StatusCode([]byte(parts[0])[0]),
  195. }
  196. }
  197. }
  198. return stat, err
  199. }