repo.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. package audit
  2. import (
  3. "bytes"
  4. "crypto/md5"
  5. "fmt"
  6. "github.com/BurntSushi/toml"
  7. "github.com/sergi/go-diff/diffmatchpatch"
  8. log "github.com/sirupsen/logrus"
  9. "github.com/zricethezav/gitleaks/config"
  10. "github.com/zricethezav/gitleaks/manager"
  11. "gopkg.in/src-d/go-billy.v4"
  12. "gopkg.in/src-d/go-git.v4"
  13. "gopkg.in/src-d/go-git.v4/plumbing"
  14. "gopkg.in/src-d/go-git.v4/plumbing/object"
  15. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  16. "gopkg.in/src-d/go-git.v4/storage/memory"
  17. "io"
  18. "os"
  19. "sync"
  20. "time"
  21. )
  22. // Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
  23. // Commits are inspected from the *git.Repository object. If a commit is found then we send it
  24. // via the manager LeakChan where the manager receives and keeps track of all leaks.
  25. type Repo struct {
  26. *git.Repository
  27. // AlternativeConfig is used when the --repo-config option is set.
  28. // This allows users to load up configs specific to their repos.
  29. // Imagine the scenario where you are doing an audit of a large organization
  30. // and you want certain repos to look for specific rules. If those specific repos
  31. // have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
  32. // for those repo audits.
  33. AlternativeConfig config.Config
  34. config config.Config
  35. Name string
  36. Manager *manager.Manager
  37. }
  38. // NewRepo initializes and returns a Repo struct.
  39. func NewRepo(m *manager.Manager) *Repo {
  40. return &Repo{
  41. Manager: m,
  42. config: m.Config,
  43. }
  44. }
  45. // Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
  46. // is determined by the clone options set in Manager.metadata.cloneOptions
  47. func (repo *Repo) Clone(cloneOptions ...*git.CloneOptions) error {
  48. var (
  49. repository *git.Repository
  50. err error
  51. cloneOption *git.CloneOptions
  52. )
  53. if len(cloneOptions) != 0 {
  54. cloneOption = cloneOptions[0]
  55. } else {
  56. cloneOption = repo.Manager.CloneOptions
  57. }
  58. log.Infof("cloning... %s", cloneOption.URL)
  59. start := time.Now()
  60. if repo.Manager.CloneDir != "" {
  61. clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
  62. repository, err = git.PlainClone(clonePath, false, cloneOption)
  63. } else {
  64. repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
  65. }
  66. if err != nil {
  67. return err
  68. }
  69. repo.Repository = repository
  70. repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
  71. return nil
  72. }
  73. // AuditLocal will do a `git diff` and scan changed files that are being tracked. This is useful functionality
  74. // for a pre-commit hook so you can make sure your code does not have any leaks before committing.
  75. func (repo *Repo) AuditLocal() error {
  76. auditTimeStart := time.Now()
  77. r, err := repo.Head()
  78. if err != nil {
  79. return err
  80. }
  81. c, err := repo.CommitObject(r.Hash())
  82. if err != nil {
  83. return err
  84. }
  85. prevTree, err := c.Tree()
  86. if err != nil {
  87. return err
  88. }
  89. wt, err := repo.Worktree()
  90. if err != nil {
  91. return err
  92. }
  93. status, err := wt.Status()
  94. for fn, state := range status {
  95. var (
  96. prevFileContents string
  97. currFileContents string
  98. filename string
  99. )
  100. if state.Staging != git.Untracked {
  101. if state.Staging == git.Deleted {
  102. // file in staging has been deleted, aka it is not on the filesystem
  103. // so the contents of the file are ""
  104. currFileContents = ""
  105. } else {
  106. workTreeBuf := bytes.NewBuffer(nil)
  107. workTreeFile, err := wt.Filesystem.Open(fn)
  108. if err != nil {
  109. continue
  110. }
  111. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  112. return err
  113. }
  114. currFileContents = workTreeBuf.String()
  115. filename = workTreeFile.Name()
  116. }
  117. // get files at HEAD state
  118. prevFile, err := prevTree.File(fn)
  119. if err != nil {
  120. prevFileContents = ""
  121. } else {
  122. prevFileContents, err = prevFile.Contents()
  123. if err != nil {
  124. return err
  125. }
  126. if filename == "" {
  127. filename = prevFile.Name
  128. }
  129. }
  130. dmp := diffmatchpatch.New()
  131. diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
  132. var diffContents string
  133. for _, d := range diffs {
  134. switch d.Type {
  135. case diffmatchpatch.DiffInsert:
  136. diffContents += fmt.Sprintf("%s\n", d.Text)
  137. case diffmatchpatch.DiffDelete:
  138. diffContents += fmt.Sprintf("%s\n", d.Text)
  139. }
  140. }
  141. InspectString(diffContents, c, repo, filename)
  142. }
  143. }
  144. if err != nil {
  145. return err
  146. }
  147. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  148. return nil
  149. }
  150. // Audit is responsible for scanning the entire history (default behavior) of a
  151. // git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
  152. // See options/options.go for an explanation on these options.
  153. func (repo *Repo) Audit() error {
  154. if repo.Repository == nil {
  155. return fmt.Errorf("%s repo is empty", repo.Name)
  156. }
  157. // load up alternative config if possible, if not use manager's config
  158. if repo.Manager.Opts.RepoConfig {
  159. cfg, err := repo.loadRepoConfig()
  160. if err != nil {
  161. return err
  162. }
  163. repo.config = cfg
  164. }
  165. auditTimeStart := time.Now()
  166. // audit single Commit
  167. if repo.Manager.Opts.Commit != "" {
  168. h := plumbing.NewHash(repo.Manager.Opts.Commit)
  169. c, err := repo.CommitObject(h)
  170. if err != nil {
  171. return err
  172. }
  173. err = inspectCommit(c, repo)
  174. if err != nil {
  175. return err
  176. }
  177. return nil
  178. }
  179. logOpts, err := getLogOptions(repo)
  180. if err != nil {
  181. return err
  182. }
  183. cIter, err := repo.Log(logOpts)
  184. if err != nil {
  185. return err
  186. }
  187. //checker := make(map[string]bool)
  188. cc := 0
  189. semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
  190. wg := sync.WaitGroup{}
  191. err = cIter.ForEach(func(c *object.Commit) error {
  192. if c == nil {
  193. return storer.ErrStop
  194. }
  195. if len(c.ParentHashes) == 0 {
  196. cc++
  197. err = inspectCommit(c, repo)
  198. if err != nil {
  199. return err
  200. }
  201. return nil
  202. }
  203. // TODO check whitelist Commit
  204. if isCommitWhiteListed(c.Hash.String(), repo.config.Whitelist.Commits) {
  205. return nil
  206. }
  207. cc++
  208. err = c.Parents().ForEach(func(parent *object.Commit) error {
  209. start := time.Now()
  210. patch, err := c.Patch(parent)
  211. if err != nil {
  212. return fmt.Errorf("could not generate patch")
  213. }
  214. repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
  215. wg.Add(1)
  216. semaphore <- true
  217. go func(c *object.Commit, patch *object.Patch) {
  218. defer func() {
  219. <-semaphore
  220. wg.Done()
  221. }()
  222. inspectPatch(patch, c, repo)
  223. }(c, patch)
  224. return nil
  225. })
  226. return nil
  227. })
  228. wg.Wait()
  229. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  230. repo.Manager.IncrementCommits(cc)
  231. return nil
  232. }
  233. // Open opens a local repo either from repo-path or $PWD
  234. func (repo *Repo) Open() error {
  235. if repo.Manager.Opts.RepoPath != "" {
  236. // open git repo from repo path
  237. repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
  238. if err != nil {
  239. return err
  240. }
  241. repo.Repository = repository
  242. } else {
  243. // open git repo from PWD
  244. dir, err := os.Getwd()
  245. if err != nil {
  246. return err
  247. }
  248. repository, err := git.PlainOpen(dir)
  249. if err != nil {
  250. return err
  251. }
  252. repo.Repository = repository
  253. }
  254. return nil
  255. }
  256. func (repo *Repo) loadRepoConfig() (config.Config, error) {
  257. wt, err := repo.Repository.Worktree()
  258. if err != nil {
  259. return config.Config{}, err
  260. }
  261. var f billy.File
  262. f, _ = wt.Filesystem.Open(".gitleaks.toml")
  263. if f == nil {
  264. f, err = wt.Filesystem.Open("gitleaks.toml")
  265. if err != nil {
  266. return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
  267. }
  268. }
  269. defer f.Close()
  270. var tomlLoader config.TomlLoader
  271. _, err = toml.DecodeReader(f, &tomlLoader)
  272. return tomlLoader.Parse()
  273. }