repo.go 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. package scan
  2. import (
  3. "context"
  4. "crypto/md5"
  5. "fmt"
  6. "io/ioutil"
  7. "os"
  8. "path"
  9. "path/filepath"
  10. "runtime"
  11. "time"
  12. "github.com/zricethezav/gitleaks/v5/config"
  13. "github.com/zricethezav/gitleaks/v5/manager"
  14. "github.com/BurntSushi/toml"
  15. "github.com/go-git/go-billy/v5"
  16. "github.com/go-git/go-git/v5"
  17. "github.com/go-git/go-git/v5/plumbing"
  18. "github.com/go-git/go-git/v5/storage/memory"
  19. log "github.com/sirupsen/logrus"
  20. )
  21. // Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
  22. // Commits are inspected from the *git.Repository object. If a Commit is found then we send it
  23. // via the manager LeakChan where the manager receives and keeps track of all leaks.
  24. type Repo struct {
  25. *git.Repository
  26. // config is used when the --repo-config option is set.
  27. // This allows users to load up configs specific to their repos.
  28. // Imagine the scenario where you are doing an scan of a large organization
  29. // and you want certain repos to look for specific rules. If those specific repos
  30. // have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
  31. // for those repo scans.
  32. config config.Config
  33. // ctx is used to signal timeouts to running goroutines
  34. ctx context.Context
  35. cancel context.CancelFunc
  36. Name string
  37. Manager *manager.Manager
  38. }
  39. // NewRepo initializes and returns a Repo struct.
  40. func NewRepo(m *manager.Manager) *Repo {
  41. return &Repo{
  42. Manager: m,
  43. config: m.Config,
  44. ctx: context.Background(),
  45. }
  46. }
  47. // Run accepts a manager and begins an scan based on the options/configs set in the manager.
  48. func Run(m *manager.Manager) error {
  49. if m.Opts.OwnerPath != "" {
  50. files, err := ioutil.ReadDir(m.Opts.OwnerPath)
  51. if err != nil {
  52. return err
  53. }
  54. for _, f := range files {
  55. if !f.IsDir() {
  56. continue
  57. }
  58. m.Opts.RepoPath = fmt.Sprintf("%s/%s", m.Opts.OwnerPath, f.Name())
  59. if err := runHelper(NewRepo(m)); err != nil {
  60. log.Warnf("%s is not a git repo, skipping", f.Name())
  61. }
  62. }
  63. return nil
  64. }
  65. return runHelper(NewRepo(m))
  66. }
  67. func runHelper(r *Repo) error {
  68. // Ignore allowlisted repos
  69. for _, wlRepo := range r.Manager.Config.Allowlist.Repos {
  70. if RegexMatched(r.Manager.Opts.RepoPath, wlRepo) {
  71. return nil
  72. }
  73. if RegexMatched(r.Manager.Opts.Repo, wlRepo) {
  74. return nil
  75. }
  76. }
  77. if r.Manager.Opts.OpenLocal() {
  78. r.Name = path.Base(r.Manager.Opts.RepoPath)
  79. if err := r.Open(); err != nil {
  80. return err
  81. }
  82. // Check if we are checking uncommitted files. This is the default behavior
  83. // for a "$ gitleaks" command with no options set
  84. if r.Manager.Opts.CheckUncommitted() {
  85. if err := r.scanUncommitted(); err != nil {
  86. return err
  87. }
  88. return nil
  89. }
  90. } else {
  91. if err := r.Clone(nil); err != nil {
  92. return err
  93. }
  94. }
  95. return r.Scan()
  96. }
  97. // Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
  98. // is determined by the clone options set in Manager.metadata.cloneOptions
  99. func (repo *Repo) Clone(cloneOption *git.CloneOptions) error {
  100. var (
  101. repository *git.Repository
  102. err error
  103. )
  104. if cloneOption == nil {
  105. cloneOption = repo.Manager.CloneOptions
  106. }
  107. log.Infof("cloning... %s", cloneOption.URL)
  108. start := time.Now()
  109. if repo.Manager.CloneDir != "" {
  110. clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
  111. repository, err = git.PlainClone(clonePath, false, cloneOption)
  112. } else {
  113. repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
  114. }
  115. if err != nil {
  116. return err
  117. }
  118. repo.Name = filepath.Base(repo.Manager.Opts.Repo)
  119. repo.Repository = repository
  120. repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
  121. return nil
  122. }
  123. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  124. // of goroutines that will spawn during gitleaks execution
  125. func howManyThreads(threads int) int {
  126. maxThreads := runtime.GOMAXPROCS(0)
  127. if threads == 0 {
  128. return 1
  129. } else if threads > maxThreads {
  130. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  131. return maxThreads
  132. }
  133. return threads
  134. }
  135. // getLogOptions determines what log options are used when iterating through commits.
  136. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  137. // gitleaks gets the full git history.
  138. func getLogOptions(repo *Repo) (*git.LogOptions, error) {
  139. var logOpts git.LogOptions
  140. const dateformat string = "2006-01-02"
  141. const timeformat string = "2006-01-02T15:04:05-0700"
  142. if repo.Manager.Opts.CommitFrom != "" {
  143. logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
  144. }
  145. if repo.Manager.Opts.CommitSince != "" {
  146. if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitSince); err == nil {
  147. logOpts.Since = &t
  148. } else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitSince); err == nil {
  149. logOpts.Since = &t
  150. } else {
  151. return nil, err
  152. }
  153. }
  154. if repo.Manager.Opts.CommitUntil != "" {
  155. if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitUntil); err == nil {
  156. logOpts.Until = &t
  157. } else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitUntil); err == nil {
  158. logOpts.Until = &t
  159. } else {
  160. return nil, err
  161. }
  162. }
  163. if repo.Manager.Opts.Branch != "" {
  164. refs, err := repo.Storer.IterReferences()
  165. if err != nil {
  166. return nil, err
  167. }
  168. err = refs.ForEach(func(ref *plumbing.Reference) error {
  169. if ref.Name().IsTag() {
  170. return nil
  171. }
  172. // check heads first
  173. if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
  174. logOpts = git.LogOptions{
  175. From: ref.Hash(),
  176. }
  177. return nil
  178. } else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
  179. logOpts = git.LogOptions{
  180. From: ref.Hash(),
  181. }
  182. return nil
  183. }
  184. return nil
  185. })
  186. if logOpts.From.IsZero() {
  187. return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
  188. }
  189. return &logOpts, nil
  190. }
  191. if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
  192. return &logOpts, nil
  193. }
  194. return &git.LogOptions{All: true}, nil
  195. }
  196. // howLong accepts a time.Time object which is subtracted from time.Now() and
  197. // converted to nanoseconds which is returned
  198. func howLong(t time.Time) int64 {
  199. return time.Now().Sub(t).Nanoseconds()
  200. }
  201. // Open opens a local repo either from repo-path or $PWD
  202. func (repo *Repo) Open() error {
  203. if repo.Manager.Opts.RepoPath != "" {
  204. // open git repo from repo path
  205. repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
  206. if err != nil {
  207. return err
  208. }
  209. repo.Repository = repository
  210. } else {
  211. // open git repo from PWD
  212. dir, err := os.Getwd()
  213. if err != nil {
  214. return err
  215. }
  216. repository, err := git.PlainOpen(dir)
  217. if err != nil {
  218. return err
  219. }
  220. repo.Repository = repository
  221. repo.Name = path.Base(dir)
  222. }
  223. return nil
  224. }
  225. func (repo *Repo) loadRepoConfig() (config.Config, error) {
  226. wt, err := repo.Repository.Worktree()
  227. if err != nil {
  228. return config.Config{}, err
  229. }
  230. var f billy.File
  231. f, _ = wt.Filesystem.Open(".gitleaks.toml")
  232. if f == nil {
  233. f, err = wt.Filesystem.Open("gitleaks.toml")
  234. if err != nil {
  235. return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
  236. }
  237. }
  238. defer f.Close()
  239. var tomlLoader config.TomlLoader
  240. _, err = toml.DecodeReader(f, &tomlLoader)
  241. return tomlLoader.Parse()
  242. }
  243. // timeoutReached returns true if the timeout deadline has been met. This function should be used
  244. // at the top of loops and before potentially long running goroutines (like checking inefficient regexes)
  245. func (repo *Repo) timeoutReached() bool {
  246. if repo.ctx.Err() == context.DeadlineExceeded {
  247. return true
  248. }
  249. return false
  250. }
  251. // setupTimeout parses the --timeout option and assigns a context with timeout to the manager
  252. // which will exit early if the timeout has been met.
  253. func (repo *Repo) setupTimeout() error {
  254. if repo.Manager.Opts.Timeout == "" {
  255. return nil
  256. }
  257. timeout, err := time.ParseDuration(repo.Manager.Opts.Timeout)
  258. if err != nil {
  259. return err
  260. }
  261. repo.ctx, repo.cancel = context.WithTimeout(context.Background(), timeout)
  262. go func() {
  263. select {
  264. case <-repo.ctx.Done():
  265. if repo.timeoutReached() {
  266. log.Warnf("Timeout deadline (%s) exceeded for %s", timeout.String(), repo.Name)
  267. }
  268. }
  269. }()
  270. return nil
  271. }