repo.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. package gitleaks
  2. import (
  3. "crypto/md5"
  4. "fmt"
  5. "os"
  6. "path/filepath"
  7. "strings"
  8. "sync"
  9. "time"
  10. "github.com/google/go-github/github"
  11. log "github.com/sirupsen/logrus"
  12. git "gopkg.in/src-d/go-git.v4"
  13. diffType "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  14. "gopkg.in/src-d/go-git.v4/plumbing/object"
  15. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  16. "gopkg.in/src-d/go-git.v4/storage/memory"
  17. )
  18. // Leak represents a leaked secret or regex match.
  19. type Leak struct {
  20. Line string `json:"line"`
  21. Commit string `json:"commit"`
  22. Offender string `json:"offender"`
  23. Type string `json:"reason"`
  24. Message string `json:"commitMsg"`
  25. Author string `json:"author"`
  26. File string `json:"file"`
  27. Repo string `json:"repo"`
  28. Date time.Time `json:"date"`
  29. }
  30. // RepoInfo contains a src-d git repository and other data about the repo
  31. type RepoInfo struct {
  32. path string
  33. url string
  34. name string
  35. repository *git.Repository
  36. err error
  37. }
  38. type commitInfo struct {
  39. content string
  40. commit *object.Commit
  41. filePath string
  42. repoName string
  43. githubCommit *github.RepositoryCommit
  44. sha string
  45. message string
  46. author string
  47. date time.Time
  48. }
  49. func newRepoInfo() (*RepoInfo, error) {
  50. for _, re := range config.WhiteList.repos {
  51. if re.FindString(opts.Repo) != "" {
  52. return nil, fmt.Errorf("skipping %s, whitelisted", opts.Repo)
  53. }
  54. }
  55. return &RepoInfo{
  56. path: opts.RepoPath,
  57. url: opts.Repo,
  58. name: filepath.Base(opts.Repo),
  59. }, nil
  60. }
  61. // clone will clone a repo
  62. func (repoInfo *RepoInfo) clone() error {
  63. var (
  64. err error
  65. repo *git.Repository
  66. )
  67. // check if cloning to disk
  68. if opts.Disk {
  69. log.Infof("cloning %s to disk", opts.Repo)
  70. cloneTarget := fmt.Sprintf("%s/%x", dir, md5.Sum([]byte(fmt.Sprintf("%s%s", opts.GithubUser, opts.Repo))))
  71. if strings.HasPrefix(opts.Repo, "git") {
  72. // private
  73. repo, err = git.PlainClone(cloneTarget, false, &git.CloneOptions{
  74. URL: opts.Repo,
  75. Progress: os.Stdout,
  76. Auth: config.sshAuth,
  77. })
  78. } else {
  79. // public
  80. repo, err = git.PlainClone(cloneTarget, false, &git.CloneOptions{
  81. URL: opts.Repo,
  82. Progress: os.Stdout,
  83. })
  84. }
  85. } else if repoInfo.path != "" {
  86. log.Infof("opening %s", opts.RepoPath)
  87. repo, err = git.PlainOpen(repoInfo.path)
  88. } else {
  89. // cloning to memory
  90. log.Infof("cloning %s", opts.Repo)
  91. if strings.HasPrefix(opts.Repo, "git") {
  92. repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
  93. URL: opts.Repo,
  94. Progress: os.Stdout,
  95. Auth: config.sshAuth,
  96. })
  97. } else {
  98. repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
  99. URL: opts.Repo,
  100. Progress: os.Stdout,
  101. })
  102. }
  103. }
  104. repoInfo.repository = repo
  105. repoInfo.err = err
  106. return err
  107. }
  108. // audit performs an audit
  109. func (repoInfo *RepoInfo) audit() ([]Leak, error) {
  110. var (
  111. err error
  112. leaks []Leak
  113. commitCount int64
  114. commitWg sync.WaitGroup
  115. mutex = &sync.Mutex{}
  116. semaphore chan bool
  117. )
  118. for _, re := range config.WhiteList.repos {
  119. if re.FindString(repoInfo.name) != "" {
  120. return leaks, fmt.Errorf("skipping %s, whitelisted", repoInfo.name)
  121. }
  122. }
  123. // check if target contains an external gitleaks toml
  124. if opts.RepoConfig {
  125. err := config.updateFromRepo(repoInfo)
  126. if err != nil {
  127. return leaks, nil
  128. }
  129. }
  130. // iterate all through commits
  131. cIter, err := repoInfo.repository.Log(&git.LogOptions{
  132. All: true,
  133. })
  134. if err != nil {
  135. return leaks, nil
  136. }
  137. if opts.Threads != 0 {
  138. threads = opts.Threads
  139. }
  140. if opts.RepoPath != "" {
  141. threads = 1
  142. }
  143. semaphore = make(chan bool, threads)
  144. err = cIter.ForEach(func(c *object.Commit) error {
  145. if c == nil || (opts.Depth != 0 && commitCount == opts.Depth) {
  146. return storer.ErrStop
  147. }
  148. if config.WhiteList.commits[c.Hash.String()] {
  149. log.Infof("skipping commit: %s\n", c.Hash.String())
  150. return nil
  151. }
  152. commitCount = commitCount + 1
  153. totalCommits = totalCommits + 1
  154. // commits w/o parent (root of git the git ref) or option for single commit is not empty str
  155. if len(c.ParentHashes) == 0 || opts.Commit == c.Hash.String() {
  156. leaks = append(repoInfo.auditSingleCommit(c, mutex), leaks...)
  157. return nil
  158. }
  159. // regular commit audit
  160. err = c.Parents().ForEach(func(parent *object.Commit) error {
  161. commitWg.Add(1)
  162. semaphore <- true
  163. go func(c *object.Commit, parent *object.Commit) {
  164. var (
  165. filePath string
  166. skipFile bool
  167. )
  168. defer func() {
  169. commitWg.Done()
  170. <-semaphore
  171. if r := recover(); r != nil {
  172. log.Warnf("recovering from panic on commit %s, likely large diff causing panic", c.Hash.String())
  173. }
  174. }()
  175. patch, err := c.Patch(parent)
  176. if err != nil {
  177. log.Warnf("problem generating patch for commit: %s\n", c.Hash.String())
  178. return
  179. }
  180. for _, f := range patch.FilePatches() {
  181. if f.IsBinary() {
  182. continue
  183. }
  184. skipFile = false
  185. from, to := f.Files()
  186. filePath = "???"
  187. if from != nil {
  188. filePath = from.Path()
  189. } else if to != nil {
  190. filePath = to.Path()
  191. }
  192. for _, re := range config.WhiteList.files {
  193. if re.FindString(filePath) != "" {
  194. log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), filePath)
  195. skipFile = true
  196. break
  197. }
  198. }
  199. if skipFile {
  200. continue
  201. }
  202. chunks := f.Chunks()
  203. for _, chunk := range chunks {
  204. if chunk.Type() == diffType.Add || chunk.Type() == diffType.Delete {
  205. diff := commitInfo{
  206. repoName: repoInfo.name,
  207. filePath: filePath,
  208. content: chunk.Content(),
  209. sha: c.Hash.String(),
  210. author: c.Author.String(),
  211. message: strings.Replace(c.Message, "\n", " ", -1),
  212. date: c.Author.When,
  213. }
  214. chunkLeaks := inspect(diff)
  215. for _, leak := range chunkLeaks {
  216. mutex.Lock()
  217. leaks = append(leaks, leak)
  218. mutex.Unlock()
  219. }
  220. }
  221. }
  222. }
  223. }(c, parent)
  224. return nil
  225. })
  226. return nil
  227. })
  228. commitWg.Wait()
  229. return leaks, nil
  230. // // clear commit cache
  231. // commitMap = make(map[string]bool)
  232. // refs, err := repoInfo.repository.Storer.IterReferences()
  233. // if err != nil {
  234. // return leaks, err
  235. // }
  236. // err = refs.ForEach(func(ref *plumbing.Reference) error {
  237. // if ref.Name().IsTag() {
  238. // return nil
  239. // }
  240. // branchLeaks := repoInfo.auditRef(ref)
  241. // for _, leak := range branchLeaks {
  242. // leaks = append(leaks, leak)
  243. // }
  244. // return nil
  245. // })
  246. }
  247. func (repoInfo *RepoInfo) auditSingleCommit(c *object.Commit, mutex *sync.Mutex) []Leak {
  248. var leaks []Leak
  249. fIter, err := c.Files()
  250. if err != nil {
  251. return nil
  252. }
  253. err = fIter.ForEach(func(f *object.File) error {
  254. bin, err := f.IsBinary()
  255. if bin || err != nil {
  256. return nil
  257. }
  258. for _, re := range config.WhiteList.files {
  259. if re.FindString(f.Name) != "" {
  260. log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), f.Name)
  261. return nil
  262. }
  263. }
  264. content, err := f.Contents()
  265. if err != nil {
  266. return nil
  267. }
  268. diff := commitInfo{
  269. repoName: repoInfo.name,
  270. filePath: f.Name,
  271. content: content,
  272. sha: c.Hash.String(),
  273. author: c.Author.String(),
  274. message: strings.Replace(c.Message, "\n", " ", -1),
  275. date: c.Author.When,
  276. }
  277. fileLeaks := inspect(diff)
  278. mutex.Lock()
  279. leaks = append(leaks, fileLeaks...)
  280. mutex.Unlock()
  281. return nil
  282. })
  283. return leaks
  284. }