git.go 6.6 KB


  1. package sources
  2. import (
  3. "bufio"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "os"
  8. "os/exec"
  9. "path/filepath"
  10. "regexp"
  11. "strings"
  12. "github.com/gitleaks/go-gitdiff/gitdiff"
  13. "github.com/zricethezav/gitleaks/v8/logging"
  14. )
  15. var quotedOptPattern = regexp.MustCompile(`^(?:"[^"]+"|'[^']+')$`)
  16. // GitCmd helps to work with Git's output.
  17. type GitCmd struct {
  18. repoPath string
  19. cmd *exec.Cmd
  20. diffFilesCh <-chan *gitdiff.File
  21. errCh <-chan error
  22. }
  23. type GitInfo struct {
  24. Source string
  25. Commit string
  26. Link string
  27. Author string
  28. Email string
  29. Date string
  30. Message string
  31. }
  32. // NewGitLogCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
  33. // Caller should read everything from channels until receiving a signal about their closure and call
  34. // the `func (*DiffFilesCmd) Wait()` error in order to release resources.
  35. func NewGitLogCmd(source string, logOpts string) (*GitCmd, error) {
  36. sourceClean := filepath.Clean(source)
  37. var cmd *exec.Cmd
  38. if logOpts != "" {
  39. args := []string{"-C", sourceClean, "log", "-p", "-U0"}
  40. // Ensure that the user-provided |logOpts| aren't wrapped in quotes.
  41. // https://github.com/gitleaks/gitleaks/issues/1153
  42. userArgs := strings.Split(logOpts, " ")
  43. var quotedOpts []string
  44. for _, element := range userArgs {
  45. if quotedOptPattern.MatchString(element) {
  46. quotedOpts = append(quotedOpts, element)
  47. }
  48. }
  49. if len(quotedOpts) > 0 {
  50. logging.Warn().Msgf("the following `--log-opts` values may not work as expected: %v\n\tsee https://github.com/gitleaks/gitleaks/issues/1153 for more information", quotedOpts)
  51. }
  52. args = append(args, userArgs...)
  53. cmd = exec.Command("git", args...)
  54. } else {
  55. cmd = exec.Command("git", "-C", sourceClean, "log", "-p", "-U0",
  56. "--full-history", "--all")
  57. }
  58. logging.Debug().Msgf("executing: %s", cmd.String())
  59. stdout, err := cmd.StdoutPipe()
  60. if err != nil {
  61. return nil, err
  62. }
  63. stderr, err := cmd.StderrPipe()
  64. if err != nil {
  65. return nil, err
  66. }
  67. if err := cmd.Start(); err != nil {
  68. return nil, err
  69. }
  70. errCh := make(chan error)
  71. go listenForStdErr(stderr, errCh)
  72. gitdiffFiles, err := gitdiff.Parse(stdout)
  73. if err != nil {
  74. return nil, err
  75. }
  76. return &GitCmd{
  77. repoPath: sourceClean,
  78. cmd: cmd,
  79. diffFilesCh: gitdiffFiles,
  80. errCh: errCh,
  81. }, nil
  82. }
  83. // NewGitDiffCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
  84. // Caller should read everything from channels until receiving a signal about their closure and call
  85. // the `func (*DiffFilesCmd) Wait()` error in order to release resources.
  86. func NewGitDiffCmd(source string, staged bool) (*GitCmd, error) {
  87. sourceClean := filepath.Clean(source)
  88. var cmd *exec.Cmd
  89. cmd = exec.Command("git", "-C", sourceClean, "diff", "-U0", "--no-ext-diff", ".")
  90. if staged {
  91. cmd = exec.Command("git", "-C", sourceClean, "diff", "-U0", "--no-ext-diff",
  92. "--staged", ".")
  93. }
  94. logging.Debug().Msgf("executing: %s", cmd.String())
  95. stdout, err := cmd.StdoutPipe()
  96. if err != nil {
  97. return nil, err
  98. }
  99. stderr, err := cmd.StderrPipe()
  100. if err != nil {
  101. return nil, err
  102. }
  103. if err := cmd.Start(); err != nil {
  104. return nil, err
  105. }
  106. errCh := make(chan error)
  107. go listenForStdErr(stderr, errCh)
  108. gitdiffFiles, err := gitdiff.Parse(stdout)
  109. if err != nil {
  110. return nil, err
  111. }
  112. return &GitCmd{
  113. repoPath: sourceClean,
  114. cmd: cmd,
  115. diffFilesCh: gitdiffFiles,
  116. errCh: errCh,
  117. }, nil
  118. }
  119. // CheckoutBlob writes the contents of the blob at commit:filepath into a temp file
  120. // and returns its path.
  121. func (g *GitCmd) CheckoutBlob(commit, filepathInRepo string) (string, error) {
  122. // Create a temp file with the same extension as the blob, if possible
  123. ext := filepath.Ext(filepathInRepo)
  124. // tmpDir, err := os.MkdirTemp("gitleaks", "archive-*")
  125. tmpFile, err := os.CreateTemp("", "gitleaks-blob-*"+ext)
  126. if err != nil {
  127. return "", fmt.Errorf("creating temp file for blob: %w", err)
  128. }
  129. tmpFilePath := tmpFile.Name()
  130. tmpFile.Close()
  131. // git show <commit>:<path>
  132. gitArgs := []string{"-C", g.repoPath, "show", fmt.Sprintf("%s:%s", commit, filepathInRepo)}
  133. cmd := exec.Command("git", gitArgs...)
  134. cmd.Stdout, err = os.OpenFile(tmpFilePath, os.O_WRONLY, 0o644)
  135. if err != nil {
  136. os.Remove(tmpFilePath)
  137. return "", fmt.Errorf("opening temp file for write: %w", err)
  138. }
  139. if err := cmd.Run(); err != nil {
  140. os.Remove(tmpFilePath)
  141. return "", fmt.Errorf("git show failed: %w", err)
  142. }
  143. return tmpFilePath, nil
  144. }
  145. // DiffFilesCh returns a channel with *gitdiff.File.
  146. func (c *GitCmd) DiffFilesCh() <-chan *gitdiff.File {
  147. return c.diffFilesCh
  148. }
  149. // ErrCh returns a channel that could produce an error if there is something in stderr.
  150. func (c *GitCmd) ErrCh() <-chan error {
  151. return c.errCh
  152. }
  153. // Wait waits for the command to exit and waits for any copying to
  154. // stdin or copying from stdout or stderr to complete.
  155. //
  156. // Wait also closes underlying stdout and stderr.
  157. func (c *GitCmd) Wait() (err error) {
  158. return c.cmd.Wait()
  159. }
  160. // listenForStdErr listens for stderr output from git, prints it to stdout,
  161. // sends to errCh and closes it.
  162. func listenForStdErr(stderr io.ReadCloser, errCh chan<- error) {
  163. defer close(errCh)
  164. var errEncountered bool
  165. scanner := bufio.NewScanner(stderr)
  166. for scanner.Scan() {
  167. // if git throws one of the following errors:
  168. //
  169. // exhaustive rename detection was skipped due to too many files.
  170. // you may want to set your diff.renameLimit variable to at least
  171. // (some large number) and retry the command.
  172. //
  173. // inexact rename detection was skipped due to too many files.
  174. // you may want to set your diff.renameLimit variable to at least
  175. // (some large number) and retry the command.
  176. //
  177. // Auto packing the repository in background for optimum performance.
  178. // See "git help gc" for manual housekeeping.
  179. //
  180. // we skip exiting the program as git log -p/git diff will continue
  181. // to send data to stdout and finish executing. This next bit of
  182. // code prevents gitleaks from stopping mid scan if this error is
  183. // encountered
  184. if strings.Contains(scanner.Text(),
  185. "exhaustive rename detection was skipped") ||
  186. strings.Contains(scanner.Text(),
  187. "inexact rename detection was skipped") ||
  188. strings.Contains(scanner.Text(),
  189. "you may want to set your diff.renameLimit") ||
  190. strings.Contains(scanner.Text(),
  191. "See \"git help gc\" for manual housekeeping") ||
  192. strings.Contains(scanner.Text(),
  193. "Auto packing the repository in background for optimum performance") {
  194. logging.Warn().Msg(scanner.Text())
  195. } else {
  196. logging.Error().Msgf("[git] %s", scanner.Text())
  197. errEncountered = true
  198. }
  199. }
  200. if errEncountered {
  201. errCh <- errors.New("stderr is not empty")
  202. return
  203. }
  204. }