repo.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. package scan
  2. import (
  3. "sync"
  4. "github.com/zricethezav/gitleaks/v7/report"
  5. "github.com/go-git/go-git/v5"
  6. fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
  7. "github.com/go-git/go-git/v5/plumbing/object"
  8. "github.com/go-git/go-git/v5/plumbing/storer"
  9. log "github.com/sirupsen/logrus"
  10. )
  11. // RepoScanner is a repo scanner
  12. type RepoScanner struct {
  13. BaseScanner
  14. repo *git.Repository
  15. repoName string
  16. leakChan chan report.Leak
  17. leakWG *sync.WaitGroup
  18. leakCache map[string]bool
  19. leaks []report.Leak
  20. }
  21. // NewRepoScanner returns a new repo scanner (go figure). This function also
  22. // sets up the leak listener for multi-threaded awesomeness.
  23. func NewRepoScanner(base BaseScanner, repo *git.Repository) *RepoScanner {
  24. rs := &RepoScanner{
  25. BaseScanner: base,
  26. repo: repo,
  27. leakChan: make(chan report.Leak),
  28. leakWG: &sync.WaitGroup{},
  29. leakCache: make(map[string]bool),
  30. repoName: getRepoName(base.opts),
  31. }
  32. rs.scannerType = typeRepoScanner
  33. go rs.receiveLeaks()
  34. return rs
  35. }
  36. // Scan kicks of a repo scan
  37. func (rs *RepoScanner) Scan() (report.Report, error) {
  38. var scannerReport report.Report
  39. logOpts, err := logOptions(rs.repo, rs.opts)
  40. if err != nil {
  41. return scannerReport, err
  42. }
  43. cIter, err := rs.repo.Log(logOpts)
  44. if err != nil {
  45. return scannerReport, err
  46. }
  47. semaphore := make(chan bool, howManyThreads(rs.opts.Threads))
  48. wg := sync.WaitGroup{}
  49. err = cIter.ForEach(func(c *object.Commit) error {
  50. if c == nil || depthReached(scannerReport.Commits, rs.opts) {
  51. return storer.ErrStop
  52. }
  53. // Check if Commit is allowlisted
  54. if isCommitAllowListed(c.Hash.String(), rs.cfg.Allowlist.Commits) {
  55. return nil
  56. }
  57. // Check if at root
  58. if len(c.ParentHashes) == 0 {
  59. scannerReport.Commits++
  60. facScanner := NewFilesAtCommitScanner(rs.BaseScanner, rs.repo, c)
  61. facScanner.repoName = rs.repoName
  62. facReport, err := facScanner.Scan()
  63. if err != nil {
  64. return err
  65. }
  66. scannerReport.Leaks = append(scannerReport.Leaks, facReport.Leaks...)
  67. return nil
  68. }
  69. // inspect first parent only as all other parents will be eventually reached
  70. // (they exist as the tip of other branches, etc)
  71. // See https://github.com/zricethezav/gitleaks/issues/413 for details
  72. parent, err := c.Parent(0)
  73. if err != nil {
  74. return err
  75. }
  76. defer func() {
  77. if err := recover(); err != nil {
  78. // sometimes the Patch generation will fail due to a known bug in
  79. // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
  80. // Once a fix has been merged I will remove this recover.
  81. return
  82. }
  83. }()
  84. if parent == nil {
  85. // shouldn't reach this point but just in case
  86. return nil
  87. }
  88. // start := time.Now()
  89. patch, err := parent.Patch(c)
  90. if err != nil {
  91. log.Errorf("could not generate Patch")
  92. }
  93. scannerReport.Commits++
  94. wg.Add(1)
  95. semaphore <- true
  96. go func(c *object.Commit, patch *object.Patch) {
  97. defer func() {
  98. <-semaphore
  99. wg.Done()
  100. }()
  101. // patchContent is used for searching for leak line number
  102. patchContent := patch.String()
  103. for _, f := range patch.FilePatches() {
  104. if f.IsBinary() {
  105. continue
  106. }
  107. for _, chunk := range f.Chunks() {
  108. if chunk.Type() == fdiff.Add {
  109. _, to := f.Files()
  110. lineLookup := make(map[string]bool)
  111. for _, leak := range checkRules(rs.BaseScanner, c, rs.repoName, to.Path(), chunk.Content()) {
  112. leak.LineNumber = extractLine(patchContent, leak, lineLookup)
  113. leak.LeakURL = leakURL(leak)
  114. if rs.opts.Verbose {
  115. logLeak(leak, rs.opts.Redact)
  116. }
  117. rs.leakWG.Add(1)
  118. rs.leakChan <- leak
  119. }
  120. }
  121. }
  122. }
  123. }(c, patch)
  124. if c.Hash.String() == rs.opts.CommitTo {
  125. return storer.ErrStop
  126. }
  127. return nil
  128. })
  129. wg.Wait()
  130. rs.leakWG.Wait()
  131. scannerReport.Leaks = append(scannerReport.Leaks, rs.leaks...)
  132. return scannerReport, nil
  133. }
  134. func (rs *RepoScanner) receiveLeaks() {
  135. for leak := range rs.leakChan {
  136. rs.leaks = append(rs.leaks, leak)
  137. rs.leakWG.Done()
  138. }
  139. }