repo.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. package main
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "fmt"
  6. "go.uber.org/zap"
  7. "io/ioutil"
  8. "os"
  9. "os/exec"
  10. "path"
  11. "path/filepath"
  12. "sync"
  13. )
  14. type Repo struct {
  15. name string
  16. url string
  17. path string
  18. status string // TODO
  19. leaks []Leak
  20. }
  21. type Leak struct {
  22. Line string `json:"line"`
  23. Commit string `json:"commit"`
  24. Offender string `json:"string"`
  25. Reason string `json:"reason"`
  26. Msg string `json:"commitMsg"`
  27. Time string `json:"time"`
  28. Author string `json:"author"`
  29. File string `json:"file"`
  30. RepoURL string `json:"repoURL"`
  31. }
  32. type Commit struct {
  33. Hash string
  34. Author string
  35. Time string
  36. Msg string
  37. }
  38. // running gitleaks on local repo
  39. func newLocalRepo(repoPath string) *Repo {
  40. _, name := path.Split(repoPath)
  41. repo := &Repo{
  42. name: name,
  43. path: repoPath,
  44. }
  45. return repo
  46. }
  47. func newRepo(name string, url string) *Repo {
  48. repo := &Repo{
  49. name: name,
  50. url: url,
  51. // TODO handle existing one
  52. path: opts.ClonePath + "/" + name,
  53. }
  54. return repo
  55. }
  56. func (repo *Repo) Info(msg string) {
  57. // logger should have these infos: msg, repo, owner, time
  58. logger.Info(msg,
  59. zap.String("repo", repo.name),
  60. zap.String("repo_path", repo.path),
  61. )
  62. }
  63. func (repo *Repo) PrettyPrintF(format string, args ...interface{}) {
  64. if opts.PrettyPrint {
  65. fmt.Fprintf(os.Stderr, format, args...)
  66. }
  67. }
  68. // Audit operates on a single repo and searches the full or partial history of the repo.
  69. // A semaphore is declared for every repo to bind concurrency. If unbounded, the system will throw a
  70. // `too many open files` error. Eventually, gitleaks should use src-d/go-git to avoid shelling out
  71. // commands so that users could opt for doing all clones/diffs in memory.
  72. // Audit also declares two WaitGroups, one for distributing regex/entropy checks, and one for receiving
  73. // the leaks if there are any. This could be done a little more elegantly in the future.
  74. func (repo *Repo) audit(owner *Owner) (bool, error) {
  75. var (
  76. out []byte
  77. err error
  78. commitWG sync.WaitGroup
  79. gitLeakReceiverWG sync.WaitGroup
  80. gitLeaksChan = make(chan Leak)
  81. leaks []Leak
  82. semaphoreChan = make(chan struct{}, opts.Concurrency)
  83. leaksPst bool
  84. )
  85. dotGitPath := filepath.Join(repo.path, ".git")
  86. // Navigate to proper location to being audit. Clone repo
  87. // if not present, otherwise fetch for new changes.
  88. if _, err := os.Stat(dotGitPath); os.IsNotExist(err) {
  89. if opts.LocalMode {
  90. return false, fmt.Errorf("%s does not exist", repo.path)
  91. }
  92. // no repo present, clone it
  93. repo.Info("cloning")
  94. repo.PrettyPrintF("Cloning \x1b[37;1m%s\x1b[0m...\n", repo.url)
  95. err = exec.Command("git", "clone", repo.url, repo.path).Run()
  96. if err != nil {
  97. return false, fmt.Errorf("cannot clone %s into %s", repo.url, repo.path)
  98. }
  99. } else {
  100. repo.Info("fetching")
  101. repo.PrettyPrintF("Fetching \x1b[37;1m%s\x1b[0m...\n", repo.url)
  102. err = exec.Command("git", "fetch").Run()
  103. if err != nil {
  104. return false, fmt.Errorf("cannot fetch %s from %s", repo.url, repo.path)
  105. }
  106. }
  107. err = os.Chdir(fmt.Sprintf(repo.path))
  108. if err != nil {
  109. return false, fmt.Errorf("cannot navigate to %s", repo.path)
  110. }
  111. gitFormat := "--format=%H%n%an%n%s%n%ci"
  112. out, err = exec.Command("git", "rev-list", "--all",
  113. "--remotes", "--topo-order", gitFormat).Output()
  114. if err != nil {
  115. return false, fmt.Errorf("could not retreive rev-list from %s", repo.name)
  116. }
  117. revListLines := bytes.Split(out, []byte("\n"))
  118. commits := parseRevList(revListLines)
  119. for _, commit := range commits {
  120. if commit.Hash == "" {
  121. continue
  122. }
  123. commitWG.Add(1)
  124. go auditDiff(commit, repo, &commitWG, &gitLeakReceiverWG,
  125. semaphoreChan, gitLeaksChan)
  126. if commit.Hash == opts.SinceCommit {
  127. break
  128. }
  129. }
  130. go reportAggregator(&gitLeakReceiverWG, gitLeaksChan, &leaks)
  131. commitWG.Wait()
  132. gitLeakReceiverWG.Wait()
  133. if len(leaks) != 0 {
  134. leaksPst = true
  135. }
  136. if opts.ReportPath != "" && len(leaks) != 0 {
  137. err = repo.writeReport()
  138. if err != nil {
  139. return leaksPst, fmt.Errorf("could not write report to %s", opts.ReportPath)
  140. }
  141. }
  142. return leaksPst, nil
  143. }
  144. // Used by audit, writeReport will generate a report and write it out to
  145. // $GITLEAKS_HOME/report/<owner>/<repo>. No report will be generated if
  146. // no leaks have been found
  147. func (repo *Repo) writeReport() error {
  148. reportJSON, _ := json.MarshalIndent(repo.leaks, "", "\t")
  149. if _, err := os.Stat(opts.ReportPath); os.IsNotExist(err) {
  150. os.Mkdir(opts.ReportPath, os.ModePerm)
  151. }
  152. reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
  153. reportFile := filepath.Join(opts.ReportPath, reportFileName)
  154. err := ioutil.WriteFile(reportFile, reportJSON, 0644)
  155. if err != nil {
  156. return err
  157. }
  158. repo.Info(fmt.Sprintf("Report written to %s\n", reportFile))
  159. return nil
  160. }
  161. // parseRevList is responsible for parsing the output of
  162. // $ `git rev-list --all -remotes --topo-order --format=%H%n%an%n%s%n%ci`
  163. // sample output from the above command looks like:
  164. // ...
  165. // SHA
  166. // Author Name
  167. // Commit Msg
  168. // Commit Date
  169. // ...
  170. // Used by audit
  171. func parseRevList(revList [][]byte) []Commit {
  172. var commits []Commit
  173. for i := 0; i < len(revList)-1; i = i + 5 {
  174. commit := Commit{
  175. Hash: string(revList[i+1]),
  176. Author: string(revList[i+2]),
  177. Msg: string(revList[i+3]),
  178. Time: string(revList[i+4]),
  179. }
  180. commits = append(commits, commit)
  181. }
  182. return commits
  183. }
  184. // reportAggregator is a go func responsible for ...
  185. func reportAggregator(gitLeakReceiverWG *sync.WaitGroup, gitLeaks chan Leak, leaks *[]Leak) {
  186. for gitLeak := range gitLeaks {
  187. logger.Info("leak",
  188. zap.String("line", gitLeak.Line),
  189. zap.String("commit", gitLeak.Commit),
  190. zap.String("offender", gitLeak.Offender),
  191. zap.String("Reason", gitLeak.Reason),
  192. zap.String("author", gitLeak.Author),
  193. zap.String("file", gitLeak.File),
  194. zap.String("repoURL", gitLeak.RepoURL),
  195. zap.String("timeOfCommit", gitLeak.Time),
  196. )
  197. *leaks = append(*leaks, gitLeak)
  198. if opts.PrettyPrint {
  199. b, err := json.MarshalIndent(gitLeak, "", " ")
  200. if err != nil {
  201. // handle this?
  202. fmt.Println("failed to output leak:", err)
  203. }
  204. fmt.Println(string(b))
  205. }
  206. gitLeakReceiverWG.Done()
  207. }
  208. }
  209. // Used by audit, auditDiff is a go func responsible for diffing and auditing a commit.
  210. // Three channels are input here: 1. a semaphore to bind gitleaks, 2. a leak stream, 3. error handling (TODO)
  211. // This func performs a diff and runs regexes checks on each line of the diff.
  212. func auditDiff(currCommit Commit, repo *Repo, commitWG *sync.WaitGroup,
  213. gitLeakReceiverWG *sync.WaitGroup, semaphoreChan chan struct{},
  214. gitLeaks chan Leak) {
  215. // signal to WG this diff is done being audited
  216. defer commitWG.Done()
  217. if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
  218. // TODO handle this better
  219. os.Exit(EXIT_FAILURE)
  220. }
  221. commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
  222. semaphoreChan <- struct{}{}
  223. out, err := exec.Command("git", "diff", commitCmp).Output()
  224. <-semaphoreChan
  225. if err != nil {
  226. os.Exit(EXIT_FAILURE)
  227. }
  228. leaks := doChecks(string(out), currCommit, repo)
  229. if len(leaks) == 0 {
  230. return
  231. }
  232. for _, leak := range leaks {
  233. gitLeakReceiverWG.Add(1)
  234. gitLeaks <- leak
  235. }
  236. }