unstaged.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. package scan
  2. import (
  3. "bytes"
  4. "fmt"
  5. "io"
  6. "os/exec"
  7. "path/filepath"
  8. "strings"
  9. "time"
  10. "github.com/zricethezav/gitleaks/v7/config"
  11. "github.com/zricethezav/gitleaks/v7/options"
  12. "github.com/go-git/go-git/v5"
  13. "github.com/go-git/go-git/v5/plumbing"
  14. "github.com/sergi/go-diff/diffmatchpatch"
  15. )
  16. // UnstagedScanner is an unstaged scanner. This is the scanner used when you don't provide program arguments
  17. // which will then scan your PWD. This scans unstaged changes in your repo.
  18. type UnstagedScanner struct {
  19. opts options.Options
  20. cfg config.Config
  21. repo *git.Repository
  22. repoName string
  23. }
  24. // NewUnstagedScanner returns an unstaged scanner
  25. func NewUnstagedScanner(opts options.Options, cfg config.Config, repo *git.Repository) *UnstagedScanner {
  26. us := &UnstagedScanner{
  27. opts: opts,
  28. cfg: cfg,
  29. repo: repo,
  30. repoName: getRepoName(opts),
  31. }
  32. return us
  33. }
  34. // Scan kicks off an unstaged scan. This will attempt to determine unstaged changes which are then scanned.
  35. func (us *UnstagedScanner) Scan() (Report, error) {
  36. var scannerReport Report
  37. r, err := us.repo.Head()
  38. if err == plumbing.ErrReferenceNotFound {
  39. wt, err := us.repo.Worktree()
  40. if err != nil {
  41. return scannerReport, err
  42. }
  43. status, err := wt.Status()
  44. if err != nil {
  45. return scannerReport, err
  46. }
  47. for fn := range status {
  48. workTreeBuf := bytes.NewBuffer(nil)
  49. workTreeFile, err := wt.Filesystem.Open(fn)
  50. if err != nil {
  51. continue
  52. }
  53. // Check if file is allow listed
  54. if us.cfg.Allowlist.FileAllowed(filepath.Base(fn)) ||
  55. us.cfg.Allowlist.PathAllowed(fn) {
  56. continue
  57. }
  58. // Check individual file path ONLY rules
  59. for _, rule := range us.cfg.Rules {
  60. if rule.HasFileOrPathLeakOnly(fn) {
  61. leak := NewLeak("", "Filename or path offender: "+fn, defaultLineNumber)
  62. leak.Repo = us.repoName
  63. leak.File = fn
  64. leak.RepoURL = us.opts.RepoURL
  65. leak.LeakURL = leak.URL()
  66. leak.Rule = rule.Description
  67. leak.Tags = strings.Join(rule.Tags, ", ")
  68. leak.Log(us.opts)
  69. scannerReport.Leaks = append(scannerReport.Leaks, leak)
  70. continue
  71. }
  72. }
  73. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  74. return scannerReport, err
  75. }
  76. lineNumber := 0
  77. for _, line := range strings.Split(workTreeBuf.String(), "\n") {
  78. lineNumber++
  79. for _, rule := range us.cfg.Rules {
  80. offender := rule.Inspect(line)
  81. if offender == "" {
  82. continue
  83. }
  84. if us.cfg.Allowlist.RegexAllowed(line) ||
  85. rule.AllowList.FileAllowed(filepath.Base(workTreeFile.Name())) ||
  86. rule.AllowList.PathAllowed(workTreeFile.Name()) {
  87. continue
  88. }
  89. if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(workTreeFile.Name())) {
  90. continue
  91. }
  92. if rule.Path.String() != "" && !rule.HasFilePathLeak(filepath.Base(workTreeFile.Name())) {
  93. continue
  94. }
  95. leak := NewLeak(line, offender, defaultLineNumber).WithCommit(emptyCommit())
  96. leak.File = workTreeFile.Name()
  97. leak.LineNumber = lineNumber
  98. leak.Repo = us.repoName
  99. leak.Rule = rule.Description
  100. leak.Tags = strings.Join(rule.Tags, ", ")
  101. if us.opts.Verbose {
  102. leak.Log(us.opts)
  103. }
  104. scannerReport.Leaks = append(scannerReport.Leaks, leak)
  105. }
  106. }
  107. }
  108. return scannerReport, nil
  109. } else if err != nil {
  110. return scannerReport, err
  111. }
  112. c, err := us.repo.CommitObject(r.Hash())
  113. if err != nil {
  114. return scannerReport, err
  115. }
  116. // Staged change so the Commit details do not yet exist. Insert empty defaults.
  117. c.Hash = plumbing.Hash{}
  118. c.Message = ""
  119. c.Author.Name = ""
  120. c.Author.Email = ""
  121. c.Author.When = time.Unix(0, 0).UTC()
  122. prevTree, err := c.Tree()
  123. if err != nil {
  124. return scannerReport, err
  125. }
  126. wt, err := us.repo.Worktree()
  127. if err != nil {
  128. return scannerReport, err
  129. }
  130. status, err := gitStatus(wt)
  131. if err != nil {
  132. return scannerReport, err
  133. }
  134. for fn, state := range status {
  135. var (
  136. prevFileContents string
  137. currFileContents string
  138. filename string
  139. )
  140. if state.Staging != git.Untracked {
  141. if state.Staging == git.Deleted {
  142. // file in staging has been deleted, aka it is not on the filesystem
  143. // so the contents of the file are ""
  144. currFileContents = ""
  145. } else {
  146. workTreeBuf := bytes.NewBuffer(nil)
  147. workTreeFile, err := wt.Filesystem.Open(fn)
  148. if err != nil {
  149. continue
  150. }
  151. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  152. return scannerReport, err
  153. }
  154. currFileContents = workTreeBuf.String()
  155. filename = workTreeFile.Name()
  156. }
  157. // get files at HEAD state
  158. prevFile, err := prevTree.File(fn)
  159. if err != nil {
  160. prevFileContents = ""
  161. } else {
  162. prevFileContents, err = prevFile.Contents()
  163. if err != nil {
  164. return scannerReport, err
  165. }
  166. if filename == "" {
  167. filename = prevFile.Name
  168. }
  169. }
  170. dmp := diffmatchpatch.New()
  171. diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
  172. prettyDiff := diffPrettyText(diffs)
  173. var diffContents string
  174. for _, d := range diffs {
  175. if d.Type == diffmatchpatch.DiffInsert {
  176. diffContents += fmt.Sprintf("%s\n", d.Text)
  177. }
  178. }
  179. lineLookup := make(map[string]bool)
  180. for _, line := range strings.Split(diffContents, "\n") {
  181. for _, rule := range us.cfg.Rules {
  182. offender := rule.Inspect(line)
  183. if offender == "" {
  184. continue
  185. }
  186. if us.cfg.Allowlist.RegexAllowed(line) ||
  187. rule.AllowList.FileAllowed(filepath.Base(filename)) ||
  188. rule.AllowList.PathAllowed(filename) {
  189. continue
  190. }
  191. if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(filename)) {
  192. continue
  193. }
  194. if rule.Path.String() != "" && !rule.HasFilePathLeak(filepath.Base(filename)) {
  195. continue
  196. }
  197. leak := NewLeak(line, offender, defaultLineNumber).WithCommit(emptyCommit())
  198. leak.File = filename
  199. leak.LineNumber = extractLine(prettyDiff, leak, lineLookup) + 1
  200. leak.Repo = us.repoName
  201. leak.Rule = rule.Description
  202. leak.Tags = strings.Join(rule.Tags, ", ")
  203. leak.Log(us.opts)
  204. scannerReport.Leaks = append(scannerReport.Leaks, leak)
  205. }
  206. }
  207. }
  208. }
  209. return scannerReport, err
  210. }
  211. // DiffPrettyText converts a []Diff into a colored text report.
  212. // TODO open PR for this
  213. func diffPrettyText(diffs []diffmatchpatch.Diff) string {
  214. var buff bytes.Buffer
  215. for _, diff := range diffs {
  216. text := diff.Text
  217. switch diff.Type {
  218. case diffmatchpatch.DiffInsert:
  219. _, _ = buff.WriteString("+")
  220. _, _ = buff.WriteString(text)
  221. case diffmatchpatch.DiffDelete:
  222. _, _ = buff.WriteString("-")
  223. _, _ = buff.WriteString(text)
  224. case diffmatchpatch.DiffEqual:
  225. _, _ = buff.WriteString(" ")
  226. _, _ = buff.WriteString(text)
  227. }
  228. }
  229. return buff.String()
  230. }
  231. // gitStatus returns the status of modified files in the worktree. It will attempt to execute 'git status'
  232. // and will fall back to git.Worktree.Status() if that fails.
  233. func gitStatus(wt *git.Worktree) (git.Status, error) {
  234. c := exec.Command("git", "status", "--porcelain", "-z")
  235. c.Dir = wt.Filesystem.Root()
  236. output, err := c.Output()
  237. if err != nil {
  238. stat, err := wt.Status()
  239. return stat, err
  240. }
  241. lines := strings.Split(string(output), "\000")
  242. stat := make(map[string]*git.FileStatus, len(lines))
  243. for _, line := range lines {
  244. if len(line) == 0 {
  245. continue
  246. }
  247. // For copy/rename the output looks like
  248. // R destination\000source
  249. // Which means we can split on space and ignore anything with only one result
  250. parts := strings.SplitN(strings.TrimLeft(line, " "), " ", 2)
  251. if len(parts) == 2 {
  252. stat[strings.Trim(parts[1], " ")] = &git.FileStatus{
  253. Staging: git.StatusCode([]byte(parts[0])[0]),
  254. }
  255. }
  256. }
  257. return stat, err
  258. }