repo.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. package audit
  2. import (
  3. "bytes"
  4. "crypto/md5"
  5. "fmt"
  6. "github.com/BurntSushi/toml"
  7. "github.com/sergi/go-diff/diffmatchpatch"
  8. log "github.com/sirupsen/logrus"
  9. "github.com/zricethezav/gitleaks/config"
  10. "github.com/zricethezav/gitleaks/manager"
  11. "gopkg.in/src-d/go-billy.v4"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. "gopkg.in/src-d/go-git.v4/plumbing/object"
  15. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  16. "gopkg.in/src-d/go-git.v4/storage/memory"
  17. "io"
  18. "os"
  19. "path"
  20. "sync"
  21. "time"
  22. )
  23. // Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
  24. // Commits are inspected from the *git.Repository object. If a commit is found then we send it
  25. // via the manager LeakChan where the manager receives and keeps track of all leaks.
  26. type Repo struct {
  27. *git.Repository
  28. // config is used when the --repo-config option is set.
  29. // This allows users to load up configs specific to their repos.
  30. // Imagine the scenario where you are doing an audit of a large organization
  31. // and you want certain repos to look for specific rules. If those specific repos
  32. // have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
  33. // for those repo audits.
  34. config config.Config
  35. Name string
  36. Manager *manager.Manager
  37. }
  38. // NewRepo initializes and returns a Repo struct.
  39. func NewRepo(m *manager.Manager) *Repo {
  40. return &Repo{
  41. Manager: m,
  42. config: m.Config,
  43. }
  44. }
  45. // Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
  46. // is determined by the clone options set in Manager.metadata.cloneOptions
  47. func (repo *Repo) Clone(cloneOption *git.CloneOptions) error {
  48. var (
  49. repository *git.Repository
  50. err error
  51. )
  52. if cloneOption == nil {
  53. cloneOption = repo.Manager.CloneOptions
  54. }
  55. log.Infof("cloning... %s", cloneOption.URL)
  56. start := time.Now()
  57. if repo.Manager.CloneDir != "" {
  58. clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
  59. repository, err = git.PlainClone(clonePath, false, cloneOption)
  60. } else {
  61. repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
  62. }
  63. if err != nil {
  64. return err
  65. }
  66. repo.Repository = repository
  67. repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
  68. return nil
  69. }
  70. // AuditUncommitted will do a `git diff` and scan changed files that are being tracked. This is useful functionality
  71. // for a pre-commit hook so you can make sure your code does not have any leaks before committing.
  72. func (repo *Repo) AuditUncommitted() error {
  73. auditTimeStart := time.Now()
  74. r, err := repo.Head()
  75. if err != nil {
  76. return err
  77. }
  78. c, err := repo.CommitObject(r.Hash())
  79. if err != nil {
  80. return err
  81. }
  82. prevTree, err := c.Tree()
  83. if err != nil {
  84. return err
  85. }
  86. wt, err := repo.Worktree()
  87. if err != nil {
  88. return err
  89. }
  90. status, err := wt.Status()
  91. for fn, state := range status {
  92. var (
  93. prevFileContents string
  94. currFileContents string
  95. filename string
  96. )
  97. if state.Staging != git.Untracked {
  98. if state.Staging == git.Deleted {
  99. // file in staging has been deleted, aka it is not on the filesystem
  100. // so the contents of the file are ""
  101. currFileContents = ""
  102. } else {
  103. workTreeBuf := bytes.NewBuffer(nil)
  104. workTreeFile, err := wt.Filesystem.Open(fn)
  105. if err != nil {
  106. continue
  107. }
  108. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  109. return err
  110. }
  111. currFileContents = workTreeBuf.String()
  112. filename = workTreeFile.Name()
  113. }
  114. // get files at HEAD state
  115. prevFile, err := prevTree.File(fn)
  116. if err != nil {
  117. prevFileContents = ""
  118. } else {
  119. prevFileContents, err = prevFile.Contents()
  120. if err != nil {
  121. return err
  122. }
  123. if filename == "" {
  124. filename = prevFile.Name
  125. }
  126. }
  127. dmp := diffmatchpatch.New()
  128. diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
  129. var diffContents string
  130. for _, d := range diffs {
  131. switch d.Type {
  132. case diffmatchpatch.DiffInsert:
  133. diffContents += fmt.Sprintf("%s\n", d.Text)
  134. case diffmatchpatch.DiffDelete:
  135. diffContents += fmt.Sprintf("%s\n", d.Text)
  136. }
  137. }
  138. InspectString(diffContents, c, repo, filename)
  139. }
  140. }
  141. if err != nil {
  142. return err
  143. }
  144. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  145. return nil
  146. }
  147. // Audit is responsible for scanning the entire history (default behavior) of a
  148. // git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
  149. // See options/options.go for an explanation on these options.
  150. func (repo *Repo) Audit() error {
  151. if repo.Repository == nil {
  152. return fmt.Errorf("%s repo is empty", repo.Name)
  153. }
  154. // load up alternative config if possible, if not use manager's config
  155. if repo.Manager.Opts.RepoConfig {
  156. cfg, err := repo.loadRepoConfig()
  157. if err != nil {
  158. return err
  159. }
  160. repo.config = cfg
  161. }
  162. auditTimeStart := time.Now()
  163. // audit single Commit
  164. if repo.Manager.Opts.Commit != "" {
  165. h := plumbing.NewHash(repo.Manager.Opts.Commit)
  166. c, err := repo.CommitObject(h)
  167. if err != nil {
  168. return err
  169. }
  170. err = inspectCommit(c, repo)
  171. if err != nil {
  172. return err
  173. }
  174. return nil
  175. }
  176. logOpts, err := getLogOptions(repo)
  177. if err != nil {
  178. return err
  179. }
  180. cIter, err := repo.Log(logOpts)
  181. if err != nil {
  182. return err
  183. }
  184. //checker := make(map[string]bool)
  185. cc := 0
  186. semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
  187. wg := sync.WaitGroup{}
  188. err = cIter.ForEach(func(c *object.Commit) error {
  189. if c == nil {
  190. return storer.ErrStop
  191. }
  192. if len(c.ParentHashes) == 0 {
  193. cc++
  194. err = inspectCommit(c, repo)
  195. if err != nil {
  196. return err
  197. }
  198. return nil
  199. }
  200. if isCommitWhiteListed(c.Hash.String(), repo.config.Whitelist.Commits) {
  201. return nil
  202. }
  203. cc++
  204. err = c.Parents().ForEach(func(parent *object.Commit) error {
  205. start := time.Now()
  206. patch, err := c.Patch(parent)
  207. if err != nil {
  208. return fmt.Errorf("could not generate patch")
  209. }
  210. repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
  211. wg.Add(1)
  212. semaphore <- true
  213. go func(c *object.Commit, patch *object.Patch) {
  214. defer func() {
  215. <-semaphore
  216. wg.Done()
  217. }()
  218. inspectPatch(patch, c, repo)
  219. }(c, patch)
  220. return nil
  221. })
  222. return nil
  223. })
  224. wg.Wait()
  225. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  226. repo.Manager.IncrementCommits(cc)
  227. return nil
  228. }
  229. // Open opens a local repo either from repo-path or $PWD
  230. func (repo *Repo) Open() error {
  231. if repo.Manager.Opts.RepoPath != "" {
  232. // open git repo from repo path
  233. repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
  234. if err != nil {
  235. return err
  236. }
  237. repo.Repository = repository
  238. } else {
  239. // open git repo from PWD
  240. dir, err := os.Getwd()
  241. if err != nil {
  242. return err
  243. }
  244. repository, err := git.PlainOpen(dir)
  245. if err != nil {
  246. return err
  247. }
  248. repo.Repository = repository
  249. repo.Name = path.Base(dir)
  250. }
  251. return nil
  252. }
  253. func (repo *Repo) loadRepoConfig() (config.Config, error) {
  254. wt, err := repo.Repository.Worktree()
  255. if err != nil {
  256. return config.Config{}, err
  257. }
  258. var f billy.File
  259. f, _ = wt.Filesystem.Open(".gitleaks.toml")
  260. if f == nil {
  261. f, err = wt.Filesystem.Open("gitleaks.toml")
  262. if err != nil {
  263. return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
  264. }
  265. }
  266. defer f.Close()
  267. var tomlLoader config.TomlLoader
  268. _, err = toml.DecodeReader(f, &tomlLoader)
  269. return tomlLoader.Parse()
  270. }