repo.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. package audit
  2. import (
  3. "bytes"
  4. "context"
  5. "crypto/md5"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path"
  10. "path/filepath"
  11. "sync"
  12. "time"
  13. "github.com/zricethezav/gitleaks/config"
  14. "github.com/zricethezav/gitleaks/manager"
  15. "github.com/BurntSushi/toml"
  16. "github.com/sergi/go-diff/diffmatchpatch"
  17. log "github.com/sirupsen/logrus"
  18. "gopkg.in/src-d/go-billy.v4"
  19. "gopkg.in/src-d/go-git.v4"
  20. "gopkg.in/src-d/go-git.v4/plumbing"
  21. "gopkg.in/src-d/go-git.v4/plumbing/object"
  22. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  23. "gopkg.in/src-d/go-git.v4/storage/memory"
  24. )
  25. // Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
  26. // Commits are inspected from the *git.Repository object. If a commit is found then we send it
  27. // via the manager LeakChan where the manager receives and keeps track of all leaks.
  28. type Repo struct {
  29. *git.Repository
  30. // config is used when the --repo-config option is set.
  31. // This allows users to load up configs specific to their repos.
  32. // Imagine the scenario where you are doing an audit of a large organization
  33. // and you want certain repos to look for specific rules. If those specific repos
  34. // have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
  35. // for those repo audits.
  36. config config.Config
  37. ctx context.Context
  38. Name string
  39. Manager *manager.Manager
  40. }
  41. // NewRepo initializes and returns a Repo struct.
  42. func NewRepo(m *manager.Manager) *Repo {
  43. return &Repo{
  44. Manager: m,
  45. config: m.Config,
  46. }
  47. }
  48. // Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
  49. // is determined by the clone options set in Manager.metadata.cloneOptions
  50. func (repo *Repo) Clone(cloneOption *git.CloneOptions) error {
  51. var (
  52. repository *git.Repository
  53. err error
  54. )
  55. if cloneOption == nil {
  56. cloneOption = repo.Manager.CloneOptions
  57. }
  58. log.Infof("cloning... %s", cloneOption.URL)
  59. start := time.Now()
  60. if repo.Manager.CloneDir != "" {
  61. clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
  62. repository, err = git.PlainClone(clonePath, false, cloneOption)
  63. } else {
  64. repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
  65. }
  66. if err != nil {
  67. return err
  68. }
  69. repo.Name = filepath.Base(repo.Manager.Opts.Repo)
  70. repo.Repository = repository
  71. repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
  72. return nil
  73. }
  74. // AuditUncommitted will do a `git diff` and scan changed files that are being tracked. This is useful functionality
  75. // for a pre-commit hook so you can make sure your code does not have any leaks before committing.
  76. func (repo *Repo) AuditUncommitted() error {
  77. // load up alternative config if possible, if not use manager's config
  78. if repo.Manager.Opts.RepoConfig {
  79. cfg, err := repo.loadRepoConfig()
  80. if err != nil {
  81. return err
  82. }
  83. repo.config = cfg
  84. }
  85. auditTimeStart := time.Now()
  86. r, err := repo.Head()
  87. if err != nil {
  88. return err
  89. }
  90. c, err := repo.CommitObject(r.Hash())
  91. if err != nil {
  92. return err
  93. }
  94. // Staged change so the commit details do not yet exist. Insert empty defaults.
  95. c.Hash = plumbing.Hash{}
  96. c.Message = "***STAGED CHANGES***"
  97. c.Author.Name = ""
  98. c.Author.Email = ""
  99. c.Author.When = time.Unix(0, 0).UTC()
  100. prevTree, err := c.Tree()
  101. if err != nil {
  102. return err
  103. }
  104. wt, err := repo.Worktree()
  105. if err != nil {
  106. return err
  107. }
  108. status, err := wt.Status()
  109. for fn, state := range status {
  110. var (
  111. prevFileContents string
  112. currFileContents string
  113. filename string
  114. )
  115. if state.Staging != git.Untracked {
  116. if state.Staging == git.Deleted {
  117. // file in staging has been deleted, aka it is not on the filesystem
  118. // so the contents of the file are ""
  119. currFileContents = ""
  120. } else {
  121. workTreeBuf := bytes.NewBuffer(nil)
  122. workTreeFile, err := wt.Filesystem.Open(fn)
  123. if err != nil {
  124. continue
  125. }
  126. if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
  127. return err
  128. }
  129. currFileContents = workTreeBuf.String()
  130. filename = workTreeFile.Name()
  131. }
  132. // get files at HEAD state
  133. prevFile, err := prevTree.File(fn)
  134. if err != nil {
  135. prevFileContents = ""
  136. } else {
  137. prevFileContents, err = prevFile.Contents()
  138. if err != nil {
  139. return err
  140. }
  141. if filename == "" {
  142. filename = prevFile.Name
  143. }
  144. }
  145. if fileMatched(filename, repo.config.Whitelist.File) {
  146. log.Debugf("whitelisted file found, skipping audit of file: %s", filename)
  147. } else if fileMatched(filename, repo.config.FileRegex) {
  148. repo.Manager.SendLeaks(manager.Leak{
  149. Line: "N/A",
  150. Offender: filename,
  151. Commit: c.Hash.String(),
  152. Repo: repo.Name,
  153. Rule: "file regex matched" + repo.config.FileRegex.String(),
  154. Message: c.Message,
  155. Author: c.Author.Name,
  156. Email: c.Author.Email,
  157. Date: c.Author.When,
  158. File: filename,
  159. })
  160. } else {
  161. dmp := diffmatchpatch.New()
  162. diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
  163. var diffContents string
  164. for _, d := range diffs {
  165. switch d.Type {
  166. case diffmatchpatch.DiffInsert:
  167. diffContents += fmt.Sprintf("%s\n", d.Text)
  168. case diffmatchpatch.DiffDelete:
  169. diffContents += fmt.Sprintf("%s\n", d.Text)
  170. }
  171. }
  172. InspectString(diffContents, c, repo, filename)
  173. }
  174. }
  175. }
  176. if err != nil {
  177. return err
  178. }
  179. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  180. return nil
  181. }
  182. // timeoutReached returns true if the timeout deadline has been met. This function should be used
  183. // at the top of loops and before potentially long running goroutines (like checking inefficient regexes)
  184. func (repo *Repo) timeoutReached() bool {
  185. if repo.ctx.Err() == context.DeadlineExceeded {
  186. return true
  187. }
  188. return false
  189. }
  190. // setupTimeout parses the --timeout option and assigns a context with timeout to the manager
  191. // which will exit early if the timeout has been met.
  192. func (repo *Repo) setupTimeout() error {
  193. if repo.Manager.Opts.Timeout == "" {
  194. return nil
  195. }
  196. timeout, err := time.ParseDuration(repo.Manager.Opts.Timeout)
  197. if err != nil {
  198. return err
  199. }
  200. repo.ctx, _ = context.WithTimeout(context.Background(), timeout)
  201. go func() {
  202. select {
  203. case <-repo.ctx.Done():
  204. log.Warnf("Timeout deadline exceeded: %s", timeout.String())
  205. }
  206. }()
  207. return nil
  208. }
  209. // Audit is responsible for scanning the entire history (default behavior) of a
  210. // git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
  211. // See options/options.go for an explanation on these options.
  212. func (repo *Repo) Audit() error {
  213. if err := repo.setupTimeout(); err != nil {
  214. return err
  215. }
  216. if repo.Repository == nil {
  217. return fmt.Errorf("%s repo is empty", repo.Name)
  218. }
  219. // load up alternative config if possible, if not use manager's config
  220. if repo.Manager.Opts.RepoConfig {
  221. cfg, err := repo.loadRepoConfig()
  222. if err != nil {
  223. return err
  224. }
  225. repo.config = cfg
  226. }
  227. auditTimeStart := time.Now()
  228. // audit single Commit
  229. if repo.Manager.Opts.Commit != "" {
  230. h := plumbing.NewHash(repo.Manager.Opts.Commit)
  231. c, err := repo.CommitObject(h)
  232. if err != nil {
  233. return err
  234. }
  235. err = inspectCommit(c, repo)
  236. if err != nil {
  237. return err
  238. }
  239. return nil
  240. }
  241. logOpts, err := getLogOptions(repo)
  242. if err != nil {
  243. return err
  244. }
  245. cIter, err := repo.Log(logOpts)
  246. if err != nil {
  247. return err
  248. }
  249. cc := 0
  250. semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
  251. wg := sync.WaitGroup{}
  252. err = cIter.ForEach(func(c *object.Commit) error {
  253. if c == nil || c.Hash.String() == repo.Manager.Opts.CommitTo || repo.timeoutReached() {
  254. return storer.ErrStop
  255. }
  256. if len(c.ParentHashes) == 0 {
  257. cc++
  258. err = inspectCommit(c, repo)
  259. if err != nil {
  260. return err
  261. }
  262. return nil
  263. }
  264. if isCommitWhiteListed(c.Hash.String(), repo.config.Whitelist.Commits) {
  265. return nil
  266. }
  267. cc++
  268. err = c.Parents().ForEach(func(parent *object.Commit) error {
  269. defer func() {
  270. if err := recover(); err != nil {
  271. // sometimes the patch generation will fail due to a known bug in
  272. // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
  273. // Once a fix has been merged I will remove this recover.
  274. return
  275. }
  276. }()
  277. if repo.timeoutReached() {
  278. return nil
  279. }
  280. start := time.Now()
  281. patch, err := c.Patch(parent)
  282. if err != nil {
  283. return fmt.Errorf("could not generate patch")
  284. }
  285. repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
  286. wg.Add(1)
  287. semaphore <- true
  288. go func(c *object.Commit, patch *object.Patch) {
  289. defer func() {
  290. <-semaphore
  291. wg.Done()
  292. }()
  293. inspectPatch(patch, c, repo)
  294. }(c, patch)
  295. return nil
  296. })
  297. return nil
  298. })
  299. wg.Wait()
  300. repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
  301. repo.Manager.IncrementCommits(cc)
  302. return nil
  303. }
  304. // Open opens a local repo either from repo-path or $PWD
  305. func (repo *Repo) Open() error {
  306. if repo.Manager.Opts.RepoPath != "" {
  307. // open git repo from repo path
  308. repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
  309. if err != nil {
  310. return err
  311. }
  312. repo.Repository = repository
  313. } else {
  314. // open git repo from PWD
  315. dir, err := os.Getwd()
  316. if err != nil {
  317. return err
  318. }
  319. repository, err := git.PlainOpen(dir)
  320. if err != nil {
  321. return err
  322. }
  323. repo.Repository = repository
  324. repo.Name = path.Base(dir)
  325. }
  326. return nil
  327. }
  328. func (repo *Repo) loadRepoConfig() (config.Config, error) {
  329. wt, err := repo.Repository.Worktree()
  330. if err != nil {
  331. return config.Config{}, err
  332. }
  333. var f billy.File
  334. f, _ = wt.Filesystem.Open(".gitleaks.toml")
  335. if f == nil {
  336. f, err = wt.Filesystem.Open("gitleaks.toml")
  337. if err != nil {
  338. return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
  339. }
  340. }
  341. defer f.Close()
  342. var tomlLoader config.TomlLoader
  343. _, err = toml.DecodeReader(f, &tomlLoader)
  344. return tomlLoader.Parse()
  345. }