main.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. package main
  2. import (
  3. "crypto/md5"
  4. "encoding/csv"
  5. "encoding/json"
  6. "fmt"
  7. "io/ioutil"
  8. "os"
  9. "path"
  10. "path/filepath"
  11. "regexp"
  12. "strings"
  13. "sync"
  14. "time"
  15. "gopkg.in/src-d/go-git.v4/plumbing"
  16. "github.com/google/go-github/github"
  17. "github.com/hako/durafmt"
  18. log "github.com/sirupsen/logrus"
  19. "gopkg.in/src-d/go-git.v4"
  20. diffType "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  21. "gopkg.in/src-d/go-git.v4/plumbing/object"
  22. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  23. "gopkg.in/src-d/go-git.v4/storage/memory"
  24. )
  25. // Leak represents a leaked secret or regex match.
  26. type Leak struct {
  27. Line string `json:"line"`
  28. Commit string `json:"commit"`
  29. Offender string `json:"offender"`
  30. Type string `json:"reason"`
  31. Message string `json:"commitMsg"`
  32. Author string `json:"author"`
  33. File string `json:"file"`
  34. Repo string `json:"repo"`
  35. Date time.Time `json:"date"`
  36. }
  37. // RepoDescriptor contains a src-d git repository and other data about the repo
  38. type RepoDescriptor struct {
  39. path string
  40. url string
  41. name string
  42. repository *git.Repository
  43. err error
  44. }
  45. type gitDiff struct {
  46. content string
  47. commit *object.Commit
  48. filePath string
  49. repoName string
  50. githubCommit *github.RepositoryCommit
  51. sha string
  52. message string
  53. author string
  54. date time.Time
  55. }
  56. const defaultGithubURL = "https://api.github.com/"
  57. const version = "1.24.0"
  58. const errExit = 2
  59. const leakExit = 1
  60. var (
  61. opts *Options
  62. singleSearchRegex *regexp.Regexp
  63. dir string
  64. threads int
  65. totalCommits int64
  66. commitMap = make(map[string]bool)
  67. cMutex = &sync.Mutex{}
  68. auditDone bool
  69. config *Config
  70. )
  71. func init() {
  72. log.SetOutput(os.Stdout)
  73. // threads = runtime.GOMAXPROCS(0) / 2
  74. threads = 1
  75. }
  76. func main() {
  77. var err error
  78. opts, err = setupOpts()
  79. if err != nil {
  80. log.Fatal(err)
  81. }
  82. config, err = newConfig()
  83. if err != nil {
  84. log.Fatal(err)
  85. }
  86. now := time.Now()
  87. leaks, err := run()
  88. if err != nil {
  89. if strings.Contains(err.Error(), "whitelisted") {
  90. log.Info(err.Error())
  91. os.Exit(0)
  92. }
  93. log.Error(err)
  94. os.Exit(errExit)
  95. }
  96. if opts.Report != "" {
  97. writeReport(leaks)
  98. }
  99. if len(leaks) != 0 {
  100. log.Warnf("%d leaks detected. %d commits inspected in %s", len(leaks), totalCommits, durafmt.Parse(time.Now().Sub(now)).String())
  101. os.Exit(leakExit)
  102. } else {
  103. log.Infof("%d leaks detected. %d commits inspected in %s", len(leaks), totalCommits, durafmt.Parse(time.Now().Sub(now)).String())
  104. }
  105. }
  106. // run parses options and kicks off the audit
  107. func run() ([]Leak, error) {
  108. var (
  109. leaks []Leak
  110. err error
  111. )
  112. if opts.Disk {
  113. // temporary directory where all the gitleaks plain clones will reside
  114. dir, err = ioutil.TempDir("", "gitleaks")
  115. defer os.RemoveAll(dir)
  116. if err != nil {
  117. return nil, err
  118. }
  119. }
  120. fmt.Println(opts)
  121. // start audits
  122. if opts.Repo != "" || opts.RepoPath != "" {
  123. // Audit a single remote repo or a local repo.
  124. repo, err := cloneRepo()
  125. if err != nil {
  126. return leaks, err
  127. }
  128. return auditGitRepo(repo)
  129. } else if opts.OwnerPath != "" {
  130. // Audit local repos. Gitleaks will look for all child directories of OwnerPath for
  131. // git repos and perform an audit on said repos.
  132. repos, err := discoverRepos(opts.OwnerPath)
  133. if err != nil {
  134. return leaks, err
  135. }
  136. for _, repo := range repos {
  137. leaksFromRepo, err := auditGitRepo(repo)
  138. if err != nil {
  139. return leaks, err
  140. }
  141. leaks = append(leaksFromRepo, leaks...)
  142. }
  143. } else if opts.GithubOrg != "" || opts.GithubUser != "" {
  144. // Audit a github owner -- a user or organization.
  145. leaks, err = auditGithubRepos()
  146. if err != nil {
  147. return leaks, err
  148. }
  149. } else if opts.GitLabOrg != "" || opts.GitLabUser != "" {
  150. leaks, err = auditGitlabRepos()
  151. if err != nil {
  152. return leaks, err
  153. }
  154. } else if opts.GithubPR != "" {
  155. return auditGithubPR()
  156. }
  157. return leaks, nil
  158. }
  159. // writeReport writes a report to a file specified in the --report= option.
  160. // Default format for report is JSON. You can use the --csv option to write the report as a csv
  161. func writeReport(leaks []Leak) error {
  162. if len(leaks) == 0 {
  163. return nil
  164. }
  165. var err error
  166. log.Infof("writing report to %s", opts.Report)
  167. if strings.HasSuffix(opts.Report, ".csv") {
  168. f, err := os.Create(opts.Report)
  169. if err != nil {
  170. return err
  171. }
  172. defer f.Close()
  173. w := csv.NewWriter(f)
  174. w.Write([]string{"repo", "line", "commit", "offender", "reason", "commitMsg", "author", "file", "date"})
  175. for _, leak := range leaks {
  176. w.Write([]string{leak.Repo, leak.Line, leak.Commit, leak.Offender, leak.Type, leak.Message, leak.Author, leak.File, leak.Date.Format(time.RFC3339)})
  177. }
  178. w.Flush()
  179. } else {
  180. var (
  181. f *os.File
  182. encoder *json.Encoder
  183. )
  184. f, err := os.Create(opts.Report)
  185. if err != nil {
  186. return err
  187. }
  188. defer f.Close()
  189. encoder = json.NewEncoder(f)
  190. encoder.SetIndent("", "\t")
  191. if _, err := f.WriteString("[\n"); err != nil {
  192. return err
  193. }
  194. for i := 0; i < len(leaks); i++ {
  195. if err := encoder.Encode(leaks[i]); err != nil {
  196. return err
  197. }
  198. // for all but the last leak, seek back and overwrite the newline appended by Encode() with comma & newline
  199. if i+1 < len(leaks) {
  200. if _, err := f.Seek(-1, 1); err != nil {
  201. return err
  202. }
  203. if _, err := f.WriteString(",\n"); err != nil {
  204. return err
  205. }
  206. }
  207. }
  208. if _, err := f.WriteString("]"); err != nil {
  209. return err
  210. }
  211. if err := f.Sync(); err != nil {
  212. log.Error(err)
  213. return err
  214. }
  215. }
  216. return err
  217. }
  218. // cloneRepo clones a repo to memory(default) or to disk if the --disk option is set.
  219. func cloneRepo() (*RepoDescriptor, error) {
  220. var (
  221. err error
  222. repo *git.Repository
  223. )
  224. // check if repo is whitelisted
  225. for _, re := range config.WhiteList.repos {
  226. if re.FindString(opts.Repo) != "" {
  227. return nil, fmt.Errorf("skipping %s, whitelisted", opts.Repo)
  228. }
  229. }
  230. // check if cloning to disk
  231. if opts.Disk {
  232. log.Infof("cloning %s to disk", opts.Repo)
  233. cloneTarget := fmt.Sprintf("%s/%x", dir, md5.Sum([]byte(fmt.Sprintf("%s%s", opts.GithubUser, opts.Repo))))
  234. if strings.HasPrefix(opts.Repo, "git") {
  235. // private
  236. repo, err = git.PlainClone(cloneTarget, false, &git.CloneOptions{
  237. URL: opts.Repo,
  238. Progress: os.Stdout,
  239. Auth: config.sshAuth,
  240. })
  241. } else {
  242. // non-private
  243. repo, err = git.PlainClone(cloneTarget, false, &git.CloneOptions{
  244. URL: opts.Repo,
  245. Progress: os.Stdout,
  246. })
  247. }
  248. } else if opts.RepoPath != "" {
  249. // local repo
  250. log.Infof("opening %s", opts.RepoPath)
  251. repo, err = git.PlainOpen(opts.RepoPath)
  252. } else {
  253. // cloning to memory
  254. log.Infof("cloning %s", opts.Repo)
  255. if strings.HasPrefix(opts.Repo, "git") {
  256. repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
  257. URL: opts.Repo,
  258. Progress: os.Stdout,
  259. Auth: config.sshAuth,
  260. })
  261. } else {
  262. repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
  263. URL: opts.Repo,
  264. Progress: os.Stdout,
  265. })
  266. }
  267. }
  268. return &RepoDescriptor{
  269. repository: repo,
  270. path: opts.RepoPath,
  271. url: opts.Repo,
  272. name: filepath.Base(opts.Repo),
  273. err: err,
  274. }, nil
  275. }
  276. // auditGitRepo beings an audit on a git repository
  277. func auditGitRepo(repo *RepoDescriptor) ([]Leak, error) {
  278. var (
  279. err error
  280. leaks []Leak
  281. )
  282. for _, re := range config.WhiteList.repos {
  283. if re.FindString(repo.name) != "" {
  284. return leaks, fmt.Errorf("skipping %s, whitelisted", repo.name)
  285. }
  286. }
  287. // check if target contains an external gitleaks toml
  288. if opts.RepoConfig {
  289. err := config.updateFromRepo(repo)
  290. if err != nil {
  291. return leaks, nil
  292. }
  293. }
  294. // clear commit cache
  295. commitMap = make(map[string]bool)
  296. refs, err := repo.repository.Storer.IterReferences()
  297. if err != nil {
  298. return leaks, err
  299. }
  300. err = refs.ForEach(func(ref *plumbing.Reference) error {
  301. if ref.Name().IsTag() {
  302. return nil
  303. }
  304. branchLeaks := auditGitReference(repo, ref)
  305. for _, leak := range branchLeaks {
  306. leaks = append(leaks, leak)
  307. }
  308. return nil
  309. })
  310. return leaks, err
  311. }
  312. // auditGitReference beings the audit for a git reference. This function will
  313. // traverse the git reference and audit each line of each diff.
  314. func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
  315. var (
  316. err error
  317. repoName string
  318. leaks []Leak
  319. commitCount int64
  320. commitWg sync.WaitGroup
  321. mutex = &sync.Mutex{}
  322. semaphore chan bool
  323. )
  324. if auditDone {
  325. return nil
  326. }
  327. repoName = repo.name
  328. if opts.Threads != 0 {
  329. threads = opts.Threads
  330. }
  331. if opts.RepoPath != "" {
  332. threads = 1
  333. }
  334. semaphore = make(chan bool, threads)
  335. cIter, err := repo.repository.Log(&git.LogOptions{From: ref.Hash()})
  336. if err != nil {
  337. return nil
  338. }
  339. err = cIter.ForEach(func(c *object.Commit) error {
  340. if c == nil || (opts.Depth != 0 && commitCount == opts.Depth) || auditDone {
  341. if commitCount == opts.Depth {
  342. auditDone = true
  343. }
  344. return storer.ErrStop
  345. }
  346. commitCount = commitCount + 1
  347. if config.WhiteList.commits[c.Hash.String()] {
  348. log.Infof("skipping commit: %s\n", c.Hash.String())
  349. return nil
  350. }
  351. // commits w/o parent (root of git the git ref) or option for single commit is not empty str
  352. if len(c.ParentHashes) == 0 || opts.Commit == c.Hash.String() {
  353. if commitMap[c.Hash.String()] {
  354. return nil
  355. }
  356. if opts.Commit == c.Hash.String() {
  357. auditDone = true
  358. }
  359. cMutex.Lock()
  360. commitMap[c.Hash.String()] = true
  361. cMutex.Unlock()
  362. totalCommits = totalCommits + 1
  363. fIter, err := c.Files()
  364. if err != nil {
  365. return nil
  366. }
  367. err = fIter.ForEach(func(f *object.File) error {
  368. bin, err := f.IsBinary()
  369. if bin || err != nil {
  370. return nil
  371. }
  372. for _, re := range config.WhiteList.files {
  373. if re.FindString(f.Name) != "" {
  374. log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), f.Name)
  375. return nil
  376. }
  377. }
  378. content, err := f.Contents()
  379. if err != nil {
  380. return nil
  381. }
  382. diff := gitDiff{
  383. repoName: repoName,
  384. filePath: f.Name,
  385. content: content,
  386. sha: c.Hash.String(),
  387. author: c.Author.String(),
  388. message: strings.Replace(c.Message, "\n", " ", -1),
  389. date: c.Author.When,
  390. }
  391. fileLeaks := inspect(diff)
  392. mutex.Lock()
  393. leaks = append(leaks, fileLeaks...)
  394. mutex.Unlock()
  395. return nil
  396. })
  397. return nil
  398. }
  399. // single commit
  400. if opts.Commit != "" {
  401. return nil
  402. }
  403. skipCount := false
  404. err = c.Parents().ForEach(func(parent *object.Commit) error {
  405. // check if we've seen this diff before
  406. if commitMap[c.Hash.String()+parent.Hash.String()] {
  407. return nil
  408. }
  409. cMutex.Lock()
  410. commitMap[c.Hash.String()+parent.Hash.String()] = true
  411. cMutex.Unlock()
  412. if !skipCount {
  413. totalCommits = totalCommits + 1
  414. skipCount = true
  415. }
  416. commitWg.Add(1)
  417. semaphore <- true
  418. go func(c *object.Commit, parent *object.Commit) {
  419. var (
  420. filePath string
  421. skipFile bool
  422. )
  423. defer func() {
  424. commitWg.Done()
  425. <-semaphore
  426. if r := recover(); r != nil {
  427. log.Warnf("recovering from panic on commit %s, likely large diff causing panic", c.Hash.String())
  428. }
  429. }()
  430. patch, err := c.Patch(parent)
  431. if err != nil {
  432. log.Warnf("problem generating patch for commit: %s\n", c.Hash.String())
  433. return
  434. }
  435. for _, f := range patch.FilePatches() {
  436. if f.IsBinary() {
  437. continue
  438. }
  439. skipFile = false
  440. from, to := f.Files()
  441. filePath = "???"
  442. if from != nil {
  443. filePath = from.Path()
  444. } else if to != nil {
  445. filePath = to.Path()
  446. }
  447. for _, re := range config.WhiteList.files {
  448. if re.FindString(filePath) != "" {
  449. log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), filePath)
  450. skipFile = true
  451. break
  452. }
  453. }
  454. if skipFile {
  455. continue
  456. }
  457. chunks := f.Chunks()
  458. for _, chunk := range chunks {
  459. if chunk.Type() == diffType.Add || chunk.Type() == diffType.Delete {
  460. diff := gitDiff{
  461. repoName: repoName,
  462. filePath: filePath,
  463. content: chunk.Content(),
  464. sha: c.Hash.String(),
  465. author: c.Author.String(),
  466. message: strings.Replace(c.Message, "\n", " ", -1),
  467. date: c.Author.When,
  468. }
  469. chunkLeaks := inspect(diff)
  470. for _, leak := range chunkLeaks {
  471. mutex.Lock()
  472. leaks = append(leaks, leak)
  473. mutex.Unlock()
  474. }
  475. }
  476. }
  477. }
  478. }(c, parent)
  479. return nil
  480. })
  481. // stop audit if we are at commitStop
  482. if c.Hash.String() == opts.CommitStop {
  483. auditDone = true
  484. return storer.ErrStop
  485. }
  486. return nil
  487. })
  488. commitWg.Wait()
  489. return leaks
  490. }
  491. // inspect will parse each line of the git diff's content against a set of regexes or
  492. // a set of regexes set by the config (see gitleaks.toml for example). This function
  493. // will skip lines that include a whitelisted regex. A list of leaks is returned.
  494. // If verbose mode (-v/--verbose) is set, then checkDiff will log leaks as they are discovered.
  495. func inspect(diff gitDiff) []Leak {
  496. var (
  497. leaks []Leak
  498. skipLine bool
  499. )
  500. lines := strings.Split(diff.content, "\n")
  501. for _, line := range lines {
  502. skipLine = false
  503. for _, re := range config.Regexes {
  504. match := re.regex.FindString(line)
  505. if match == "" {
  506. continue
  507. }
  508. if skipLine = isLineWhitelisted(line); skipLine {
  509. break
  510. }
  511. leaks = addLeak(leaks, line, match, re.description, diff)
  512. }
  513. if !skipLine && (opts.Entropy > 0 || len(config.Entropy.entropyRanges) != 0) {
  514. words := strings.Fields(line)
  515. for _, word := range words {
  516. entropy := getShannonEntropy(word)
  517. // Only check entropyRegexes and whiteListRegexes once per line, and only if an entropy leak type
  518. // was found above, since regex checks are expensive.
  519. if !entropyIsHighEnough(entropy) {
  520. continue
  521. }
  522. // If either the line is whitelisted or the line fails the noiseReduction check (when enabled),
  523. // then we can skip checking the rest of the line for high entropy words.
  524. if skipLine = !highEntropyLineIsALeak(line) || isLineWhitelisted(line); skipLine {
  525. break
  526. }
  527. leaks = addLeak(leaks, line, word, fmt.Sprintf("Entropy: %.2f", entropy), diff)
  528. }
  529. }
  530. }
  531. return leaks
  532. }
  533. // isLineWhitelisted returns true iff the line is matched by at least one of the whiteListRegexes.
  534. func isLineWhitelisted(line string) bool {
  535. for _, wRe := range config.WhiteList.regexes {
  536. whitelistMatch := wRe.FindString(line)
  537. if whitelistMatch != "" {
  538. return true
  539. }
  540. }
  541. return false
  542. }
  543. // addLeak is helper for func inspect() to append leaks if found during a diff check.
  544. func addLeak(leaks []Leak, line string, offender string, leakType string, diff gitDiff) []Leak {
  545. leak := Leak{
  546. Line: line,
  547. Commit: diff.sha,
  548. Offender: offender,
  549. Type: leakType,
  550. Author: diff.author,
  551. File: diff.filePath,
  552. Repo: diff.repoName,
  553. Message: diff.message,
  554. Date: diff.date,
  555. }
  556. if opts.Redact {
  557. leak.Offender = "REDACTED"
  558. leak.Line = strings.Replace(line, offender, "REDACTED", -1)
  559. }
  560. if opts.Verbose {
  561. leak.log()
  562. }
  563. leaks = append(leaks, leak)
  564. return leaks
  565. }
  566. // discoverRepos walks all the children of `path`. If a child directory
  567. // contain a .git file then that repo will be added to the list of repos returned
  568. func discoverRepos(ownerPath string) ([]*RepoDescriptor, error) {
  569. var (
  570. err error
  571. repos []*RepoDescriptor
  572. )
  573. files, err := ioutil.ReadDir(ownerPath)
  574. if err != nil {
  575. return repos, err
  576. }
  577. for _, f := range files {
  578. if f.IsDir() {
  579. repoPath := path.Join(ownerPath, f.Name())
  580. r, err := git.PlainOpen(repoPath)
  581. if err != nil {
  582. continue
  583. }
  584. repos = append(repos, &RepoDescriptor{
  585. repository: r,
  586. name: f.Name(),
  587. path: repoPath,
  588. })
  589. }
  590. }
  591. return repos, err
  592. }
  593. func (leak Leak) log() {
  594. b, _ := json.MarshalIndent(leak, "", " ")
  595. fmt.Println(string(b))
  596. }