git.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. package detect
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "net/url"
  7. "os"
  8. "os/exec"
  9. "path/filepath"
  10. "regexp"
  11. "strings"
  12. "time"
  13. "github.com/gitleaks/go-gitdiff/gitdiff"
  14. "github.com/zricethezav/gitleaks/v8/cmd/scm"
  15. "github.com/zricethezav/gitleaks/v8/logging"
  16. "github.com/zricethezav/gitleaks/v8/report"
  17. "github.com/zricethezav/gitleaks/v8/sources"
  18. )
  19. func (d *Detector) DetectGit(cmd *sources.GitCmd, remote *RemoteInfo) ([]report.Finding, error) {
  20. defer cmd.Wait()
  21. var (
  22. diffFilesCh = cmd.DiffFilesCh()
  23. errCh = cmd.ErrCh()
  24. )
  25. // loop to range over both DiffFiles (stdout) and ErrCh (stderr)
  26. for diffFilesCh != nil || errCh != nil {
  27. select {
  28. case gitdiffFile, open := <-diffFilesCh:
  29. if !open {
  30. diffFilesCh = nil
  31. break
  32. }
  33. commitSHA := ""
  34. if gitdiffFile.PatchHeader != nil {
  35. commitSHA = gitdiffFile.PatchHeader.SHA
  36. for _, a := range d.Config.Allowlists {
  37. if ok, c := a.CommitAllowed(gitdiffFile.PatchHeader.SHA); ok {
  38. logging.Trace().Str("allowed-commit", c).Msg("skipping commit: global allowlist")
  39. continue
  40. }
  41. }
  42. }
  43. if isArchive(gitdiffFile.NewName) {
  44. // Check if commit is allowed
  45. d.Sema.Go(func() error {
  46. // Check out the archive blob to disk
  47. archivePath, err := cmd.CheckoutBlob(commitSHA, gitdiffFile.NewName)
  48. if err != nil {
  49. logging.Warn().Err(err).Str("file", gitdiffFile.NewName).Msg("failed to checkout blob")
  50. return nil
  51. }
  52. defer os.Remove(archivePath)
  53. targets, tmpDir, err := extractArchive(archivePath)
  54. if err != nil {
  55. os.RemoveAll(tmpDir)
  56. logging.Warn().Err(err).Msg("failed to extract archive")
  57. return nil
  58. }
  59. // Scan each extracted file just as you would in directory mode
  60. for _, t := range targets {
  61. // build the “inside-archive” path
  62. rel, _ := filepath.Rel(tmpDir, t.Path)
  63. rel = filepath.ToSlash(rel)
  64. // chain onto any existing VirtualPath (nested archives)
  65. if t.VirtualPath != "" {
  66. t.VirtualPath = t.VirtualPath + "/" + rel
  67. } else {
  68. t.VirtualPath = filepath.Base(gitdiffFile.NewName) + "/" + rel
  69. }
  70. // TODO this isn't a great solution, and it would be nice to
  71. // have a better way to handle this.
  72. // update taget to include git information:
  73. t.Source = "github-archive"
  74. t.GitInfo.Author = gitdiffFile.PatchHeader.Author.Name
  75. t.GitInfo.Commit = commitSHA
  76. t.GitInfo.Date = gitdiffFile.PatchHeader.AuthorDate.UTC().Format(time.RFC3339)
  77. t.GitInfo.Message = gitdiffFile.PatchHeader.Message()
  78. t.GitInfo.Email = gitdiffFile.PatchHeader.Author.Email
  79. d.detectScanTarget(t)
  80. }
  81. os.RemoveAll(tmpDir)
  82. return nil
  83. })
  84. }
  85. // skip binary files
  86. if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
  87. continue
  88. }
  89. d.addCommit(commitSHA)
  90. d.Sema.Go(func() error {
  91. for _, textFragment := range gitdiffFile.TextFragments {
  92. if textFragment == nil {
  93. return nil
  94. }
  95. fragment := Fragment{
  96. Raw: textFragment.Raw(gitdiff.OpAdd),
  97. CommitSHA: commitSHA,
  98. FilePath: gitdiffFile.NewName,
  99. }
  100. timer := time.AfterFunc(SlowWarningThreshold, func() {
  101. logging.Debug().
  102. Str("commit", commitSHA[:7]).
  103. Str("path", fragment.FilePath).
  104. Msgf("Taking longer than %s to inspect fragment", SlowWarningThreshold.String())
  105. })
  106. for _, finding := range d.Detect(fragment) {
  107. d.AddFinding(augmentGitFinding(remote, finding, textFragment, gitdiffFile))
  108. }
  109. if timer != nil {
  110. timer.Stop()
  111. timer = nil
  112. }
  113. }
  114. return nil
  115. })
  116. case err, open := <-errCh:
  117. if !open {
  118. errCh = nil
  119. break
  120. }
  121. return d.findings, err
  122. }
  123. }
  124. if err := d.Sema.Wait(); err != nil {
  125. return d.findings, err
  126. }
  127. logging.Info().Msgf("%d commits scanned.", len(d.commitMap))
  128. logging.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
  129. return d.findings, nil
  130. }
  131. type RemoteInfo struct {
  132. Platform scm.Platform
  133. Url string
  134. }
  135. func NewRemoteInfo(platform scm.Platform, source string) *RemoteInfo {
  136. if platform == scm.NoPlatform {
  137. return &RemoteInfo{Platform: platform}
  138. }
  139. remoteUrl, err := getRemoteUrl(source)
  140. if err != nil {
  141. if strings.Contains(err.Error(), "No remote configured") {
  142. logging.Debug().Msg("skipping finding links: repository has no configured remote.")
  143. platform = scm.NoPlatform
  144. } else {
  145. logging.Error().Err(err).Msg("skipping finding links: unable to parse remote URL")
  146. }
  147. goto End
  148. }
  149. if platform == scm.UnknownPlatform {
  150. platform = platformFromHost(remoteUrl)
  151. if platform == scm.UnknownPlatform {
  152. logging.Info().
  153. Str("host", remoteUrl.Hostname()).
  154. Msg("Unknown SCM platform. Use --platform to include links in findings.")
  155. } else {
  156. logging.Debug().
  157. Str("host", remoteUrl.Hostname()).
  158. Str("platform", platform.String()).
  159. Msg("SCM platform parsed from host")
  160. }
  161. }
  162. End:
  163. var rUrl string
  164. if remoteUrl != nil {
  165. rUrl = remoteUrl.String()
  166. }
  167. return &RemoteInfo{
  168. Platform: platform,
  169. Url: rUrl,
  170. }
  171. }
  172. var sshUrlpat = regexp.MustCompile(`^git@([a-zA-Z0-9.-]+):([\w/.-]+?)(?:\.git)?$`)
  173. func getRemoteUrl(source string) (*url.URL, error) {
  174. // This will return the first remote — typically, "origin".
  175. cmd := exec.Command("git", "ls-remote", "--quiet", "--get-url")
  176. if source != "." {
  177. cmd.Dir = source
  178. }
  179. stdout, err := cmd.Output()
  180. if err != nil {
  181. var exitError *exec.ExitError
  182. if errors.As(err, &exitError) {
  183. return nil, fmt.Errorf("command failed (%d): %w, stderr: %s", exitError.ExitCode(), err, string(bytes.TrimSpace(exitError.Stderr)))
  184. }
  185. return nil, err
  186. }
  187. remoteUrl := string(bytes.TrimSpace(stdout))
  188. if matches := sshUrlpat.FindStringSubmatch(remoteUrl); matches != nil {
  189. remoteUrl = fmt.Sprintf("https://%s/%s", matches[1], matches[2])
  190. }
  191. remoteUrl = strings.TrimSuffix(remoteUrl, ".git")
  192. parsedUrl, err := url.Parse(remoteUrl)
  193. if err != nil {
  194. return nil, fmt.Errorf("unable to parse remote URL: %w", err)
  195. }
  196. // Remove any user info.
  197. parsedUrl.User = nil
  198. return parsedUrl, nil
  199. }
  200. func platformFromHost(u *url.URL) scm.Platform {
  201. switch strings.ToLower(u.Hostname()) {
  202. case "github.com":
  203. return scm.GitHubPlatform
  204. case "gitlab.com":
  205. return scm.GitLabPlatform
  206. case "dev.azure.com", "visualstudio.com":
  207. return scm.AzureDevOpsPlatform
  208. default:
  209. return scm.UnknownPlatform
  210. }
  211. }