repo.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. package main
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "fmt"
  6. "io/ioutil"
  7. "log"
  8. "os"
  9. "os/exec"
  10. "path"
  11. "path/filepath"
  12. "sync"
  13. )
  14. // Repo is
  15. type Repo struct {
  16. name string
  17. url string
  18. path string
  19. status string // TODO
  20. leaks []Leak
  21. reportPath string
  22. }
  23. // Leak is
  24. type Leak struct {
  25. Line string `json:"line"`
  26. Commit string `json:"commit"`
  27. Offender string `json:"string"`
  28. Reason string `json:"reason"`
  29. Msg string `json:"commitMsg"`
  30. Time string `json:"time"`
  31. Author string `json:"author"`
  32. File string `json:"file"`
  33. RepoURL string `json:"repoURL"`
  34. }
  35. // Commit is
  36. type Commit struct {
  37. Hash string
  38. Author string
  39. Time string
  40. Msg string
  41. }
  42. // newLocalRepo will such and such
  43. func newLocalRepo(repoPath string) *Repo {
  44. _, name := path.Split(repoPath)
  45. repo := &Repo{
  46. name: name,
  47. path: repoPath,
  48. reportPath: opts.ReportPath,
  49. }
  50. return repo
  51. }
  52. // newRepo
  53. func newRepo(name string, url string, path string) *Repo {
  54. repo := &Repo{
  55. name: name,
  56. url: url,
  57. path: path,
  58. reportPath: opts.ReportPath,
  59. }
  60. return repo
  61. }
  62. // rmTmp
  63. func (repo *Repo) rmTmp() {
  64. log.Printf("removing tmp gitleaks repo %s\n", repo.path)
  65. os.Remove(repo.path)
  66. }
  67. // Audit operates on a single repo and searches the full or partial history of the repo.
  68. // A semaphore is declared for every repo to bind concurrency. If unbounded, the system will throw a
  69. // `too many open files` error. Eventually, gitleaks should use src-d/go-git to avoid shelling out
  70. // commands so that users could opt for doing all clones/diffs in memory.
  71. // Audit also declares two WaitGroups, one for distributing regex/entropy checks, and one for receiving
  72. // the leaks if there are any. This could be done a little more elegantly in the future.
  73. func (repo *Repo) audit() (bool, error) {
  74. var (
  75. out []byte
  76. err error
  77. commitWG sync.WaitGroup
  78. gitLeakReceiverWG sync.WaitGroup
  79. gitLeaksChan = make(chan Leak)
  80. leaks []Leak
  81. semaphoreChan = make(chan struct{}, opts.Concurrency)
  82. leaksPst bool
  83. )
  84. if opts.Tmp {
  85. defer repo.rmTmp()
  86. }
  87. dotGitPath := filepath.Join(repo.path, ".git")
  88. // Navigate to proper location to being audit. Clone repo
  89. // if not present, otherwise fetch for new changes.
  90. if _, err := os.Stat(dotGitPath); os.IsNotExist(err) {
  91. if opts.LocalMode {
  92. return false, fmt.Errorf("%s does not exist", repo.path)
  93. }
  94. // no repo present, clone it
  95. log.Printf("cloning \x1b[37;1m%s\x1b[0m into %s...\n", repo.url, repo.path)
  96. err = exec.Command("git", "clone", repo.url, repo.path).Run()
  97. if err != nil {
  98. return false, fmt.Errorf("cannot clone %s into %s", repo.url, repo.path)
  99. }
  100. } else {
  101. log.Printf("fetching \x1b[37;1m%s\x1b[0m from %s ...\n", repo.name, repo.path)
  102. err = os.Chdir(fmt.Sprintf(repo.path))
  103. if err != nil {
  104. return false, fmt.Errorf("cannot navigate to %s", repo.path)
  105. }
  106. err = exec.Command("git", "fetch").Run()
  107. if err != nil {
  108. return false, fmt.Errorf("cannot fetch %s from %s", repo.url, repo.path)
  109. }
  110. }
  111. err = os.Chdir(fmt.Sprintf(repo.path))
  112. if err != nil {
  113. return false, fmt.Errorf("cannot navigate to %s", repo.path)
  114. }
  115. gitFormat := "--format=%H%n%an%n%s%n%ci"
  116. out, err = exec.Command("git", "rev-list", "--all",
  117. "--remotes", "--topo-order", gitFormat).Output()
  118. if err != nil {
  119. return false, fmt.Errorf("could not retreive rev-list from %s", repo.name)
  120. }
  121. revListLines := bytes.Split(out, []byte("\n"))
  122. commits := parseRevList(revListLines)
  123. for _, commit := range commits {
  124. if commit.Hash == "" {
  125. continue
  126. }
  127. commitWG.Add(1)
  128. go auditDiff(commit, repo, &commitWG, &gitLeakReceiverWG,
  129. semaphoreChan, gitLeaksChan)
  130. if commit.Hash == opts.SinceCommit {
  131. break
  132. }
  133. }
  134. go reportAggregator(&gitLeakReceiverWG, gitLeaksChan, &leaks)
  135. commitWG.Wait()
  136. gitLeakReceiverWG.Wait()
  137. if len(leaks) != 0 {
  138. leaksPst = true
  139. log.Printf("\x1b[31;2mLEAKS DETECTED for %s\x1b[0m!\n", repo.name)
  140. } else {
  141. log.Printf("No Leaks detected for \x1b[32;2m%s\x1b[0m\n", repo.name)
  142. }
  143. if opts.ReportPath != "" && len(leaks) != 0 {
  144. err = repo.writeReport(leaks)
  145. if err != nil {
  146. return leaksPst, fmt.Errorf("could not write report to %s", opts.ReportPath)
  147. }
  148. }
  149. return leaksPst, nil
  150. }
  151. // Used by audit, writeReport will generate a report and write it out to
  152. // --report-path=<path> if specified, otherwise a report will be generated to
  153. // $PWD/<repo_name>_leaks.json. No report will be generated if
  154. // no leaks have been found or --report-out is not set.
  155. func (repo *Repo) writeReport(leaks []Leak) error {
  156. reportJSON, _ := json.MarshalIndent(leaks, "", "\t")
  157. if _, err := os.Stat(opts.ReportPath); os.IsNotExist(err) {
  158. os.MkdirAll(opts.ReportPath, os.ModePerm)
  159. }
  160. reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
  161. reportFile := filepath.Join(repo.reportPath, reportFileName)
  162. err := ioutil.WriteFile(reportFile, reportJSON, 0644)
  163. if err != nil {
  164. return err
  165. }
  166. log.Printf("report for %s written to %s", repo.name, reportFile)
  167. return nil
  168. }
  169. // parseRevList is responsible for parsing the output of
  170. // $ `git rev-list --all -remotes --topo-order --format=%H%n%an%n%s%n%ci`
  171. // sample output from the above command looks like:
  172. // ...
  173. // SHA
  174. // Author Name
  175. // Commit Msg
  176. // Commit Date
  177. // ...
  178. // Used by audit
  179. func parseRevList(revList [][]byte) []Commit {
  180. var commits []Commit
  181. for i := 0; i < len(revList)-1; i = i + 5 {
  182. commit := Commit{
  183. Hash: string(revList[i+1]),
  184. Author: string(revList[i+2]),
  185. Msg: string(revList[i+3]),
  186. Time: string(revList[i+4]),
  187. }
  188. commits = append(commits, commit)
  189. }
  190. return commits
  191. }
  192. // reportAggregator is a go func responsible for ...
  193. func reportAggregator(gitLeakReceiverWG *sync.WaitGroup, gitLeaks chan Leak, leaks *[]Leak) {
  194. for gitLeak := range gitLeaks {
  195. *leaks = append(*leaks, gitLeak)
  196. if opts.Verbose {
  197. b, err := json.MarshalIndent(gitLeak, "", " ")
  198. if err != nil {
  199. // handle this?
  200. fmt.Printf("failed to output leak: %v", err)
  201. }
  202. fmt.Println(string(b))
  203. }
  204. gitLeakReceiverWG.Done()
  205. }
  206. }
  207. // Used by audit, auditDiff is a go func responsible for diffing and auditing a commit.
  208. // Three channels are input here: 1. a semaphore to bind gitleaks, 2. a leak stream, 3. error handling (TODO)
  209. // This func performs a diff and runs regexes checks on each line of the diff.
  210. func auditDiff(currCommit Commit, repo *Repo, commitWG *sync.WaitGroup,
  211. gitLeakReceiverWG *sync.WaitGroup, semaphoreChan chan struct{},
  212. gitLeaks chan Leak) {
  213. // signal to WG this diff is done being audited
  214. defer commitWG.Done()
  215. if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
  216. // TODO handle this better
  217. os.Exit(ExitFailure)
  218. }
  219. commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
  220. semaphoreChan <- struct{}{}
  221. out, err := exec.Command("git", "diff", commitCmp).Output()
  222. <-semaphoreChan
  223. if err != nil {
  224. os.Exit(ExitFailure)
  225. }
  226. leaks := doChecks(string(out), currCommit, repo)
  227. if len(leaks) == 0 {
  228. return
  229. }
  230. for _, leak := range leaks {
  231. gitLeakReceiverWG.Add(1)
  232. gitLeaks <- leak
  233. }
  234. }