unstaged.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. package scan
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "os/exec"
  7. "path/filepath"
  8. "strings"
  9. "time"
  10. "github.com/zricethezav/gitleaks/v7/config"
  11. "github.com/zricethezav/gitleaks/v7/options"
  12. "github.com/go-git/go-git/v5"
  13. "github.com/go-git/go-git/v5/plumbing"
  14. "github.com/sergi/go-diff/diffmatchpatch"
  15. )
  16. // UnstagedScanner is an unstaged scanner. This is the scanner used when you don't provide program arguments
  17. // which will then scan your PWD. This scans unstaged changes in your repo.
  18. type UnstagedScanner struct {
  19. opts options.Options
  20. cfg config.Config
  21. repo *git.Repository
  22. repoName string
  23. }
  24. // NewUnstagedScanner returns an unstaged scanner
  25. func NewUnstagedScanner(opts options.Options, cfg config.Config, repo *git.Repository) *UnstagedScanner {
  26. us := &UnstagedScanner{
  27. opts: opts,
  28. cfg: cfg,
  29. repo: repo,
  30. repoName: getRepoName(opts),
  31. }
  32. return us
  33. }
  34. // Scan kicks off an unstaged scan. This will attempt to determine unstaged changes which are then scanned.
  35. func (us *UnstagedScanner) Scan() (Report, error) {
  36. var scannerReport Report
  37. r, err := us.repo.Head()
  38. if err == plumbing.ErrReferenceNotFound {
  39. wt, err := us.repo.Worktree()
  40. if err != nil {
  41. return scannerReport, err
  42. }
  43. status, err := wt.Status()
  44. if err != nil {
  45. return scannerReport, err
  46. }
  47. for fn := range status {
  48. workTreeBuf := bytes.NewBuffer(nil)
  49. workTreeFile, err := wt.Filesystem.Open(fn)
  50. if err != nil {
  51. continue
  52. }
  53. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  54. return scannerReport, err
  55. }
  56. lineNumber := 0
  57. for _, line := range strings.Split(workTreeBuf.String(), "\n") {
  58. lineNumber++
  59. for _, rule := range us.cfg.Rules {
  60. offender := rule.Inspect(line)
  61. if offender == "" {
  62. continue
  63. }
  64. if us.cfg.Allowlist.RegexAllowed(line) ||
  65. rule.AllowList.FileAllowed(filepath.Base(workTreeFile.Name())) ||
  66. rule.AllowList.PathAllowed(workTreeFile.Name()) {
  67. continue
  68. }
  69. if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(workTreeFile.Name())) {
  70. continue
  71. }
  72. if rule.Path.String() != "" && !rule.HasFilePathLeak(filepath.Base(workTreeFile.Name())) {
  73. continue
  74. }
  75. leak := NewLeak(line, offender, defaultLineNumber).WithCommit(emptyCommit())
  76. leak.File = workTreeFile.Name()
  77. leak.LineNumber = lineNumber
  78. leak.Repo = us.repoName
  79. leak.Rule = rule.Description
  80. leak.Tags = strings.Join(rule.Tags, ", ")
  81. if us.opts.Verbose {
  82. leak.Log(us.opts.Redact)
  83. }
  84. scannerReport.Leaks = append(scannerReport.Leaks, leak)
  85. }
  86. }
  87. }
  88. return scannerReport, nil
  89. } else if err != nil {
  90. return scannerReport, err
  91. }
  92. c, err := us.repo.CommitObject(r.Hash())
  93. if err != nil {
  94. return scannerReport, err
  95. }
  96. // Staged change so the Commit details do not yet exist. Insert empty defaults.
  97. c.Hash = plumbing.Hash{}
  98. c.Message = ""
  99. c.Author.Name = ""
  100. c.Author.Email = ""
  101. c.Author.When = time.Unix(0, 0).UTC()
  102. prevTree, err := c.Tree()
  103. if err != nil {
  104. return scannerReport, err
  105. }
  106. wt, err := us.repo.Worktree()
  107. if err != nil {
  108. return scannerReport, err
  109. }
  110. status, err := gitStatus(wt)
  111. if err != nil {
  112. return scannerReport, err
  113. }
  114. for fn, state := range status {
  115. var (
  116. prevFileContents string
  117. currFileContents string
  118. filename string
  119. )
  120. if state.Staging != git.Untracked {
  121. if state.Staging == git.Deleted {
  122. // file in staging has been deleted, aka it is not on the filesystem
  123. // so the contents of the file are ""
  124. currFileContents = ""
  125. } else {
  126. workTreeBuf := bytes.NewBuffer(nil)
  127. workTreeFile, err := wt.Filesystem.Open(fn)
  128. if err != nil {
  129. continue
  130. }
  131. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  132. return scannerReport, err
  133. }
  134. currFileContents = workTreeBuf.String()
  135. filename = workTreeFile.Name()
  136. }
  137. // get files at HEAD state
  138. prevFile, err := prevTree.File(fn)
  139. if err != nil {
  140. prevFileContents = ""
  141. } else {
  142. prevFileContents, err = prevFile.Contents()
  143. if err != nil {
  144. return scannerReport, err
  145. }
  146. if filename == "" {
  147. filename = prevFile.Name
  148. }
  149. }
  150. dmp := diffmatchpatch.New()
  151. diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
  152. prettyDiff := diffPrettyText(diffs)
  153. var diffContents string
  154. for _, d := range diffs {
  155. if d.Type == diffmatchpatch.DiffInsert {
  156. diffContents += fmt.Sprintf("%s\n", d.Text)
  157. }
  158. }
  159. lineLookup := make(map[string]bool)
  160. for _, line := range strings.Split(diffContents, "\n") {
  161. for _, rule := range us.cfg.Rules {
  162. offender := rule.Inspect(line)
  163. if offender == "" {
  164. continue
  165. }
  166. if us.cfg.Allowlist.RegexAllowed(line) ||
  167. rule.AllowList.FileAllowed(filepath.Base(filename)) ||
  168. rule.AllowList.PathAllowed(filename) {
  169. continue
  170. }
  171. if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(filename)) {
  172. continue
  173. }
  174. if rule.Path.String() != "" && !rule.HasFilePathLeak(filepath.Base(filename)) {
  175. continue
  176. }
  177. leak := NewLeak(line, offender, defaultLineNumber).WithCommit(emptyCommit())
  178. leak.File = filename
  179. leak.LineNumber = extractLine(prettyDiff, leak, lineLookup) + 1
  180. leak.Repo = us.repoName
  181. leak.Rule = rule.Description
  182. leak.Tags = strings.Join(rule.Tags, ", ")
  183. if us.opts.Verbose {
  184. leak.Log(us.opts.Redact)
  185. }
  186. scannerReport.Leaks = append(scannerReport.Leaks, leak)
  187. }
  188. }
  189. }
  190. }
  191. return scannerReport, err
  192. }
  193. // DiffPrettyText converts a []Diff into a colored text report.
  194. // TODO open PR for this
  195. func diffPrettyText(diffs []diffmatchpatch.Diff) string {
  196. var buff bytes.Buffer
  197. for _, diff := range diffs {
  198. text := diff.Text
  199. switch diff.Type {
  200. case diffmatchpatch.DiffInsert:
  201. _, _ = buff.WriteString("+")
  202. _, _ = buff.WriteString(text)
  203. case diffmatchpatch.DiffDelete:
  204. _, _ = buff.WriteString("-")
  205. _, _ = buff.WriteString(text)
  206. case diffmatchpatch.DiffEqual:
  207. _, _ = buff.WriteString(" ")
  208. _, _ = buff.WriteString(text)
  209. }
  210. }
  211. return buff.String()
  212. }
  213. // gitStatus returns the status of modified files in the worktree. It will attempt to execute 'git status'
  214. // and will fall back to git.Worktree.Status() if that fails.
  215. func gitStatus(wt *git.Worktree) (git.Status, error) {
  216. c := exec.Command("git", "status", "--porcelain", "-z")
  217. c.Dir = wt.Filesystem.Root()
  218. output, err := c.Output()
  219. if err != nil {
  220. stat, err := wt.Status()
  221. return stat, err
  222. }
  223. lines := strings.Split(string(output), "\000")
  224. stat := make(map[string]*git.FileStatus, len(lines))
  225. for _, line := range lines {
  226. if len(line) == 0 {
  227. continue
  228. }
  229. // For copy/rename the output looks like
  230. // R destination\000source
  231. // Which means we can split on space and ignore anything with only one result
  232. parts := strings.SplitN(strings.TrimLeft(line, " "), " ", 2)
  233. if len(parts) == 2 {
  234. stat[strings.Trim(parts[1], " ")] = &git.FileStatus{
  235. Staging: git.StatusCode([]byte(parts[0])[0]),
  236. }
  237. }
  238. }
  239. return stat, err
  240. }