repo.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. package audit
  2. import (
  3. "bytes"
  4. "crypto/md5"
  5. "fmt"
  6. "io"
  7. "os"
  8. "path"
  9. "path/filepath"
  10. "sync"
  11. "time"
  12. "github.com/zricethezav/gitleaks/config"
  13. "github.com/zricethezav/gitleaks/manager"
  14. "github.com/BurntSushi/toml"
  15. "github.com/sergi/go-diff/diffmatchpatch"
  16. log "github.com/sirupsen/logrus"
  17. "gopkg.in/src-d/go-billy.v4"
  18. "gopkg.in/src-d/go-git.v4"
  19. "gopkg.in/src-d/go-git.v4/plumbing"
  20. "gopkg.in/src-d/go-git.v4/plumbing/object"
  21. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  22. "gopkg.in/src-d/go-git.v4/storage/memory"
  23. )
  24. // Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
  25. // Commits are inspected from the *git.Repository object. If a commit is found then we send it
  26. // via the manager LeakChan where the manager receives and keeps track of all leaks.
  27. type Repo struct {
  28. *git.Repository
  29. // config is used when the --repo-config option is set.
  30. // This allows users to load up configs specific to their repos.
  31. // Imagine the scenario where you are doing an audit of a large organization
  32. // and you want certain repos to look for specific rules. If those specific repos
  33. // have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
  34. // for those repo audits.
  35. config config.Config
  36. Name string
  37. Manager *manager.Manager
  38. }
  39. // NewRepo initializes and returns a Repo struct.
  40. func NewRepo(m *manager.Manager) *Repo {
  41. return &Repo{
  42. Manager: m,
  43. config: m.Config,
  44. }
  45. }
  46. // Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
  47. // is determined by the clone options set in Manager.metadata.cloneOptions
  48. func (repo *Repo) Clone(cloneOption *git.CloneOptions) error {
  49. var (
  50. repository *git.Repository
  51. err error
  52. )
  53. if cloneOption == nil {
  54. cloneOption = repo.Manager.CloneOptions
  55. }
  56. log.Infof("cloning... %s", cloneOption.URL)
  57. start := time.Now()
  58. if repo.Manager.CloneDir != "" {
  59. clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
  60. repository, err = git.PlainClone(clonePath, false, cloneOption)
  61. } else {
  62. repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
  63. }
  64. if err != nil {
  65. return err
  66. }
  67. repo.Name = filepath.Base(repo.Manager.Opts.Repo)
  68. repo.Repository = repository
  69. repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
  70. return nil
  71. }
  72. // AuditUncommitted will do a `git diff` and scan changed files that are being tracked. This is useful functionality
  73. // for a pre-commit hook so you can make sure your code does not have any leaks before committing.
  74. func (repo *Repo) AuditUncommitted() error {
  75. // load up alternative config if possible, if not use manager's config
  76. if repo.Manager.Opts.RepoConfig {
  77. cfg, err := repo.loadRepoConfig()
  78. if err != nil {
  79. return err
  80. }
  81. repo.config = cfg
  82. }
  83. auditTimeStart := time.Now()
  84. r, err := repo.Head()
  85. if err != nil {
  86. return err
  87. }
  88. c, err := repo.CommitObject(r.Hash())
  89. if err != nil {
  90. return err
  91. }
  92. // Staged change so the commit details do not yet exist. Insert empty defaults.
  93. c.Hash = plumbing.Hash{}
  94. c.Message = "***STAGED CHANGES***"
  95. c.Author.Name = ""
  96. c.Author.Email = ""
  97. c.Author.When = time.Unix(0, 0).UTC()
  98. prevTree, err := c.Tree()
  99. if err != nil {
  100. return err
  101. }
  102. wt, err := repo.Worktree()
  103. if err != nil {
  104. return err
  105. }
  106. status, err := wt.Status()
  107. for fn, state := range status {
  108. var (
  109. prevFileContents string
  110. currFileContents string
  111. filename string
  112. )
  113. if state.Staging != git.Untracked {
  114. if state.Staging == git.Deleted {
  115. // file in staging has been deleted, aka it is not on the filesystem
  116. // so the contents of the file are ""
  117. currFileContents = ""
  118. } else {
  119. workTreeBuf := bytes.NewBuffer(nil)
  120. workTreeFile, err := wt.Filesystem.Open(fn)
  121. if err != nil {
  122. continue
  123. }
  124. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  125. return err
  126. }
  127. currFileContents = workTreeBuf.String()
  128. filename = workTreeFile.Name()
  129. }
  130. // get files at HEAD state
  131. prevFile, err := prevTree.File(fn)
  132. if err != nil {
  133. prevFileContents = ""
  134. } else {
  135. prevFileContents, err = prevFile.Contents()
  136. if err != nil {
  137. return err
  138. }
  139. if filename == "" {
  140. filename = prevFile.Name
  141. }
  142. }
  143. if fileMatched(filename, repo.config.Whitelist.File) {
  144. log.Debugf("whitelisted file found, skipping audit of file: %s", filename)
  145. } else if fileMatched(filename, repo.config.FileRegex) {
  146. repo.Manager.SendLeaks(manager.Leak{
  147. Line: "N/A",
  148. Offender: filename,
  149. Commit: c.Hash.String(),
  150. Repo: repo.Name,
  151. Rule: "file regex matched" + repo.config.FileRegex.String(),
  152. Message: c.Message,
  153. Author: c.Author.Name,
  154. Email: c.Author.Email,
  155. Date: c.Author.When,
  156. File: filename,
  157. })
  158. } else {
  159. dmp := diffmatchpatch.New()
  160. diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
  161. var diffContents string
  162. for _, d := range diffs {
  163. switch d.Type {
  164. case diffmatchpatch.DiffInsert:
  165. diffContents += fmt.Sprintf("%s\n", d.Text)
  166. case diffmatchpatch.DiffDelete:
  167. diffContents += fmt.Sprintf("%s\n", d.Text)
  168. }
  169. }
  170. InspectString(diffContents, c, repo, filename)
  171. }
  172. }
  173. }
  174. if err != nil {
  175. return err
  176. }
  177. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  178. return nil
  179. }
  180. // Audit is responsible for scanning the entire history (default behavior) of a
  181. // git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
  182. // See options/options.go for an explanation on these options.
  183. func (repo *Repo) Audit() error {
  184. if repo.Repository == nil {
  185. return fmt.Errorf("%s repo is empty", repo.Name)
  186. }
  187. // load up alternative config if possible, if not use manager's config
  188. if repo.Manager.Opts.RepoConfig {
  189. cfg, err := repo.loadRepoConfig()
  190. if err != nil {
  191. return err
  192. }
  193. repo.config = cfg
  194. }
  195. auditTimeStart := time.Now()
  196. // audit single Commit
  197. if repo.Manager.Opts.Commit != "" {
  198. h := plumbing.NewHash(repo.Manager.Opts.Commit)
  199. c, err := repo.CommitObject(h)
  200. if err != nil {
  201. return err
  202. }
  203. err = inspectCommit(c, repo)
  204. if err != nil {
  205. return err
  206. }
  207. return nil
  208. }
  209. logOpts, err := getLogOptions(repo)
  210. if err != nil {
  211. return err
  212. }
  213. cIter, err := repo.Log(logOpts)
  214. if err != nil {
  215. return err
  216. }
  217. cc := 0
  218. semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
  219. wg := sync.WaitGroup{}
  220. err = cIter.ForEach(func(c *object.Commit) error {
  221. if c == nil {
  222. return storer.ErrStop
  223. }
  224. if len(c.ParentHashes) == 0 {
  225. cc++
  226. err = inspectCommit(c, repo)
  227. if err != nil {
  228. return err
  229. }
  230. return nil
  231. }
  232. if isCommitWhiteListed(c.Hash.String(), repo.config.Whitelist.Commits) {
  233. return nil
  234. }
  235. cc++
  236. err = c.Parents().ForEach(func(parent *object.Commit) error {
  237. defer func() {
  238. if err := recover(); err != nil {
  239. // sometimes the patch generation will fail due to a known bug in
  240. // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
  241. // Once a fix has been merged I will remove this recover.
  242. return
  243. }
  244. }()
  245. start := time.Now()
  246. patch, err := c.Patch(parent)
  247. if err != nil {
  248. return fmt.Errorf("could not generate patch")
  249. }
  250. repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
  251. wg.Add(1)
  252. semaphore <- true
  253. go func(c *object.Commit, patch *object.Patch) {
  254. defer func() {
  255. <-semaphore
  256. wg.Done()
  257. }()
  258. inspectPatch(patch, c, repo)
  259. }(c, patch)
  260. return nil
  261. })
  262. return nil
  263. })
  264. wg.Wait()
  265. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  266. repo.Manager.IncrementCommits(cc)
  267. return nil
  268. }
  269. // Open opens a local repo either from repo-path or $PWD
  270. func (repo *Repo) Open() error {
  271. if repo.Manager.Opts.RepoPath != "" {
  272. // open git repo from repo path
  273. repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
  274. if err != nil {
  275. return err
  276. }
  277. repo.Repository = repository
  278. } else {
  279. // open git repo from PWD
  280. dir, err := os.Getwd()
  281. if err != nil {
  282. return err
  283. }
  284. repository, err := git.PlainOpen(dir)
  285. if err != nil {
  286. return err
  287. }
  288. repo.Repository = repository
  289. repo.Name = path.Base(dir)
  290. }
  291. return nil
  292. }
  293. func (repo *Repo) loadRepoConfig() (config.Config, error) {
  294. wt, err := repo.Repository.Worktree()
  295. if err != nil {
  296. return config.Config{}, err
  297. }
  298. var f billy.File
  299. f, _ = wt.Filesystem.Open(".gitleaks.toml")
  300. if f == nil {
  301. f, err = wt.Filesystem.Open("gitleaks.toml")
  302. if err != nil {
  303. return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
  304. }
  305. }
  306. defer f.Close()
  307. var tomlLoader config.TomlLoader
  308. _, err = toml.DecodeReader(f, &tomlLoader)
  309. return tomlLoader.Parse()
  310. }