git.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. package detect
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "net/url"
  7. "os/exec"
  8. "regexp"
  9. "strings"
  10. "time"
  11. "github.com/gitleaks/go-gitdiff/gitdiff"
  12. "github.com/zricethezav/gitleaks/v8/cmd/scm"
  13. "github.com/zricethezav/gitleaks/v8/logging"
  14. "github.com/zricethezav/gitleaks/v8/report"
  15. "github.com/zricethezav/gitleaks/v8/sources"
  16. )
  17. func (d *Detector) DetectGit(cmd *sources.GitCmd, remote *RemoteInfo) ([]report.Finding, error) {
  18. defer cmd.Wait()
  19. var (
  20. diffFilesCh = cmd.DiffFilesCh()
  21. errCh = cmd.ErrCh()
  22. )
  23. // loop to range over both DiffFiles (stdout) and ErrCh (stderr)
  24. for diffFilesCh != nil || errCh != nil {
  25. select {
  26. case gitdiffFile, open := <-diffFilesCh:
  27. if !open {
  28. diffFilesCh = nil
  29. break
  30. }
  31. // skip binary files
  32. if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
  33. continue
  34. }
  35. // Check if commit is allowed
  36. commitSHA := ""
  37. if gitdiffFile.PatchHeader != nil {
  38. commitSHA = gitdiffFile.PatchHeader.SHA
  39. for _, a := range d.Config.Allowlists {
  40. if ok, c := a.CommitAllowed(gitdiffFile.PatchHeader.SHA); ok {
  41. logging.Trace().Str("allowed-commit", c).Msg("skipping commit: global allowlist")
  42. continue
  43. }
  44. }
  45. }
  46. d.addCommit(commitSHA)
  47. d.Sema.Go(func() error {
  48. for _, textFragment := range gitdiffFile.TextFragments {
  49. if textFragment == nil {
  50. return nil
  51. }
  52. fragment := Fragment{
  53. Raw: textFragment.Raw(gitdiff.OpAdd),
  54. CommitSHA: commitSHA,
  55. FilePath: gitdiffFile.NewName,
  56. }
  57. timer := time.AfterFunc(SlowWarningThreshold, func() {
  58. logging.Debug().
  59. Str("commit", commitSHA[:7]).
  60. Str("path", fragment.FilePath).
  61. Msgf("Taking longer than %s to inspect fragment", SlowWarningThreshold.String())
  62. })
  63. for _, finding := range d.Detect(fragment) {
  64. d.AddFinding(augmentGitFinding(remote, finding, textFragment, gitdiffFile))
  65. }
  66. if timer != nil {
  67. timer.Stop()
  68. timer = nil
  69. }
  70. }
  71. return nil
  72. })
  73. case err, open := <-errCh:
  74. if !open {
  75. errCh = nil
  76. break
  77. }
  78. return d.findings, err
  79. }
  80. }
  81. if err := d.Sema.Wait(); err != nil {
  82. return d.findings, err
  83. }
  84. logging.Info().Msgf("%d commits scanned.", len(d.commitMap))
  85. logging.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
  86. return d.findings, nil
  87. }
  88. type RemoteInfo struct {
  89. Platform scm.Platform
  90. Url string
  91. }
  92. func NewRemoteInfo(platform scm.Platform, source string) *RemoteInfo {
  93. if platform == scm.NoPlatform {
  94. return &RemoteInfo{Platform: platform}
  95. }
  96. remoteUrl, err := getRemoteUrl(source)
  97. if err != nil {
  98. if strings.Contains(err.Error(), "No remote configured") {
  99. logging.Debug().Msg("skipping finding links: repository has no configured remote.")
  100. platform = scm.NoPlatform
  101. } else {
  102. logging.Error().Err(err).Msg("skipping finding links: unable to parse remote URL")
  103. }
  104. goto End
  105. }
  106. if platform == scm.UnknownPlatform {
  107. platform = platformFromHost(remoteUrl)
  108. if platform == scm.UnknownPlatform {
  109. logging.Info().
  110. Str("host", remoteUrl.Hostname()).
  111. Msg("Unknown SCM platform. Use --platform to include links in findings.")
  112. } else {
  113. logging.Debug().
  114. Str("host", remoteUrl.Hostname()).
  115. Str("platform", platform.String()).
  116. Msg("SCM platform parsed from host")
  117. }
  118. }
  119. End:
  120. var rUrl string
  121. if remoteUrl != nil {
  122. rUrl = remoteUrl.String()
  123. }
  124. return &RemoteInfo{
  125. Platform: platform,
  126. Url: rUrl,
  127. }
  128. }
  129. var sshUrlpat = regexp.MustCompile(`^git@([a-zA-Z0-9.-]+):([\w/.-]+?)(?:\.git)?$`)
  130. func getRemoteUrl(source string) (*url.URL, error) {
  131. // This will return the first remote — typically, "origin".
  132. cmd := exec.Command("git", "ls-remote", "--quiet", "--get-url")
  133. if source != "." {
  134. cmd.Dir = source
  135. }
  136. stdout, err := cmd.Output()
  137. if err != nil {
  138. var exitError *exec.ExitError
  139. if errors.As(err, &exitError) {
  140. return nil, fmt.Errorf("command failed (%d): %w, stderr: %s", exitError.ExitCode(), err, string(bytes.TrimSpace(exitError.Stderr)))
  141. }
  142. return nil, err
  143. }
  144. remoteUrl := string(bytes.TrimSpace(stdout))
  145. if matches := sshUrlpat.FindStringSubmatch(remoteUrl); matches != nil {
  146. remoteUrl = fmt.Sprintf("https://%s/%s", matches[1], matches[2])
  147. }
  148. remoteUrl = strings.TrimSuffix(remoteUrl, ".git")
  149. parsedUrl, err := url.Parse(remoteUrl)
  150. if err != nil {
  151. return nil, fmt.Errorf("unable to parse remote URL: %w", err)
  152. }
  153. // Remove any user info.
  154. parsedUrl.User = nil
  155. return parsedUrl, nil
  156. }
  157. func platformFromHost(u *url.URL) scm.Platform {
  158. switch strings.ToLower(u.Hostname()) {
  159. case "github.com":
  160. return scm.GitHubPlatform
  161. case "gitlab.com":
  162. return scm.GitLabPlatform
  163. case "dev.azure.com", "visualstudio.com":
  164. return scm.AzureDevOpsPlatform
  165. default:
  166. return scm.UnknownPlatform
  167. }
  168. }