git.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. package detect
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "net/url"
  7. "os/exec"
  8. "regexp"
  9. "strings"
  10. "github.com/gitleaks/go-gitdiff/gitdiff"
  11. "github.com/zricethezav/gitleaks/v8/cmd/scm"
  12. "github.com/zricethezav/gitleaks/v8/logging"
  13. "github.com/zricethezav/gitleaks/v8/report"
  14. "github.com/zricethezav/gitleaks/v8/sources"
  15. )
  16. func (d *Detector) DetectGit(cmd *sources.GitCmd, remote *RemoteInfo) ([]report.Finding, error) {
  17. defer cmd.Wait()
  18. var (
  19. diffFilesCh = cmd.DiffFilesCh()
  20. errCh = cmd.ErrCh()
  21. )
  22. // loop to range over both DiffFiles (stdout) and ErrCh (stderr)
  23. for diffFilesCh != nil || errCh != nil {
  24. select {
  25. case gitdiffFile, open := <-diffFilesCh:
  26. if !open {
  27. diffFilesCh = nil
  28. break
  29. }
  30. // skip binary files
  31. if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
  32. continue
  33. }
  34. // Check if commit is allowed
  35. commitSHA := ""
  36. if gitdiffFile.PatchHeader != nil {
  37. commitSHA = gitdiffFile.PatchHeader.SHA
  38. if ok, c := d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA); ok {
  39. logging.Trace().Str("allowed-commit", c).Msg("skipping commit: global allowlist")
  40. continue
  41. }
  42. }
  43. d.addCommit(commitSHA)
  44. d.Sema.Go(func() error {
  45. for _, textFragment := range gitdiffFile.TextFragments {
  46. if textFragment == nil {
  47. return nil
  48. }
  49. fragment := Fragment{
  50. Raw: textFragment.Raw(gitdiff.OpAdd),
  51. CommitSHA: commitSHA,
  52. FilePath: gitdiffFile.NewName,
  53. }
  54. for _, finding := range d.Detect(fragment) {
  55. d.AddFinding(augmentGitFinding(remote, finding, textFragment, gitdiffFile))
  56. }
  57. }
  58. return nil
  59. })
  60. case err, open := <-errCh:
  61. if !open {
  62. errCh = nil
  63. break
  64. }
  65. return d.findings, err
  66. }
  67. }
  68. if err := d.Sema.Wait(); err != nil {
  69. return d.findings, err
  70. }
  71. logging.Info().Msgf("%d commits scanned.", len(d.commitMap))
  72. logging.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
  73. return d.findings, nil
  74. }
  75. type RemoteInfo struct {
  76. Platform scm.Platform
  77. Url string
  78. }
  79. func NewRemoteInfo(platform scm.Platform, source string) *RemoteInfo {
  80. if platform == scm.NoPlatform {
  81. return &RemoteInfo{Platform: platform}
  82. }
  83. remoteUrl, err := getRemoteUrl(source)
  84. if err != nil {
  85. if strings.Contains(err.Error(), "No remote configured") {
  86. logging.Debug().Msg("skipping finding links: repository has no configured remote.")
  87. platform = scm.NoPlatform
  88. } else {
  89. logging.Error().Err(err).Msg("skipping finding links: unable to parse remote URL")
  90. }
  91. goto End
  92. }
  93. if platform == scm.UnknownPlatform {
  94. platform = platformFromHost(remoteUrl)
  95. if platform == scm.UnknownPlatform {
  96. logging.Info().
  97. Str("host", remoteUrl.Hostname()).
  98. Msg("Unknown SCM platform. Use --platform to include links in findings.")
  99. } else {
  100. logging.Debug().
  101. Str("host", remoteUrl.Hostname()).
  102. Str("platform", platform.String()).
  103. Msg("SCM platform parsed from host")
  104. }
  105. }
  106. End:
  107. var rUrl string
  108. if remoteUrl != nil {
  109. rUrl = remoteUrl.String()
  110. }
  111. return &RemoteInfo{
  112. Platform: platform,
  113. Url: rUrl,
  114. }
  115. }
  116. var sshUrlpat = regexp.MustCompile(`^git@([a-zA-Z0-9.-]+):([\w/.-]+?)(?:\.git)?$`)
  117. func getRemoteUrl(source string) (*url.URL, error) {
  118. // This will return the first remote — typically, "origin".
  119. cmd := exec.Command("git", "ls-remote", "--quiet", "--get-url")
  120. if source != "." {
  121. cmd.Dir = source
  122. }
  123. stdout, err := cmd.Output()
  124. if err != nil {
  125. var exitError *exec.ExitError
  126. if errors.As(err, &exitError) {
  127. return nil, fmt.Errorf("command failed (%d): %w, stderr: %s", exitError.ExitCode(), err, string(bytes.TrimSpace(exitError.Stderr)))
  128. }
  129. return nil, err
  130. }
  131. remoteUrl := string(bytes.TrimSpace(stdout))
  132. if matches := sshUrlpat.FindStringSubmatch(remoteUrl); matches != nil {
  133. remoteUrl = fmt.Sprintf("https://%s/%s", matches[1], matches[2])
  134. }
  135. remoteUrl = strings.TrimSuffix(remoteUrl, ".git")
  136. parsedUrl, err := url.Parse(remoteUrl)
  137. if err != nil {
  138. return nil, fmt.Errorf("unable to parse remote URL: %w", err)
  139. }
  140. // Remove any user info.
  141. parsedUrl.User = nil
  142. return parsedUrl, nil
  143. }
  144. func platformFromHost(u *url.URL) scm.Platform {
  145. switch strings.ToLower(u.Hostname()) {
  146. case "github.com":
  147. return scm.GitHubPlatform
  148. case "gitlab.com":
  149. return scm.GitLabPlatform
  150. default:
  151. return scm.UnknownPlatform
  152. }
  153. }