main.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. package main
  2. import (
  3. "crypto/md5"
  4. "encoding/csv"
  5. "encoding/json"
  6. "fmt"
  7. "io/ioutil"
  8. "os"
  9. "path"
  10. "path/filepath"
  11. "regexp"
  12. "strings"
  13. "sync"
  14. "time"
  15. "gopkg.in/src-d/go-git.v4/plumbing"
  16. "github.com/google/go-github/github"
  17. "github.com/hako/durafmt"
  18. log "github.com/sirupsen/logrus"
  19. "gopkg.in/src-d/go-git.v4"
  20. diffType "gopkg.in/src-d/go-git.v4/plumbing/format/diff"
  21. "gopkg.in/src-d/go-git.v4/plumbing/object"
  22. "gopkg.in/src-d/go-git.v4/plumbing/storer"
  23. "gopkg.in/src-d/go-git.v4/storage/memory"
  24. )
  25. // Leak represents a leaked secret or regex match.
  26. type Leak struct {
  27. Line string `json:"line"`
  28. Commit string `json:"commit"`
  29. Offender string `json:"offender"`
  30. Type string `json:"reason"`
  31. Message string `json:"commitMsg"`
  32. Author string `json:"author"`
  33. File string `json:"file"`
  34. Repo string `json:"repo"`
  35. Date time.Time `json:"date"`
  36. }
  37. // RepoDescriptor contains a src-d git repository and other data about the repo
  38. type RepoDescriptor struct {
  39. path string
  40. url string
  41. name string
  42. repository *git.Repository
  43. err error
  44. }
  45. type gitDiff struct {
  46. content string
  47. commit *object.Commit
  48. filePath string
  49. repoName string
  50. githubCommit *github.RepositoryCommit
  51. sha string
  52. message string
  53. author string
  54. date time.Time
  55. }
  56. const defaultGithubURL = "https://api.github.com/"
  57. const version = "1.24.0"
  58. const errExit = 2
  59. const leakExit = 1
  60. var (
  61. opts *Options
  62. config *Config
  63. singleSearchRegex *regexp.Regexp
  64. dir string
  65. threads int
  66. totalCommits int64
  67. commitMap = make(map[string]bool)
  68. cMutex = &sync.Mutex{}
  69. auditDone bool
  70. )
  71. func init() {
  72. log.SetOutput(os.Stdout)
  73. // threads = runtime.GOMAXPROCS(0) / 2
  74. threads = 1
  75. }
  76. func main() {
  77. var err error
  78. opts, err = setupOpts()
  79. if err != nil {
  80. log.Fatal(err)
  81. }
  82. config, err = newConfig()
  83. if err != nil {
  84. log.Fatal(err)
  85. }
  86. now := time.Now()
  87. leaks, err := run()
  88. if err != nil {
  89. if strings.Contains(err.Error(), "whitelisted") {
  90. log.Info(err.Error())
  91. os.Exit(0)
  92. }
  93. log.Error(err)
  94. os.Exit(errExit)
  95. }
  96. if opts.Report != "" {
  97. writeReport(leaks)
  98. }
  99. if len(leaks) != 0 {
  100. log.Warnf("%d leaks detected. %d commits inspected in %s", len(leaks), totalCommits, durafmt.Parse(time.Now().Sub(now)).String())
  101. os.Exit(leakExit)
  102. } else {
  103. log.Infof("%d leaks detected. %d commits inspected in %s", len(leaks), totalCommits, durafmt.Parse(time.Now().Sub(now)).String())
  104. }
  105. }
  106. // run parses options and kicks off the audit
  107. func run() ([]Leak, error) {
  108. var (
  109. leaks []Leak
  110. err error
  111. )
  112. if opts.Disk {
  113. // temporary directory where all the gitleaks plain clones will reside
  114. dir, err = ioutil.TempDir("", "gitleaks")
  115. defer os.RemoveAll(dir)
  116. if err != nil {
  117. return nil, err
  118. }
  119. }
  120. // start audits
  121. if opts.Repo != "" || opts.RepoPath != "" {
  122. // Audit a single remote repo or a local repo.
  123. repo, err := cloneRepo()
  124. if err != nil {
  125. return leaks, err
  126. }
  127. return auditGitRepo(repo)
  128. } else if opts.OwnerPath != "" {
  129. // Audit local repos. Gitleaks will look for all child directories of OwnerPath for
  130. // git repos and perform an audit on said repos.
  131. repos, err := discoverRepos(opts.OwnerPath)
  132. if err != nil {
  133. return leaks, err
  134. }
  135. for _, repo := range repos {
  136. leaksFromRepo, err := auditGitRepo(repo)
  137. if err != nil {
  138. return leaks, err
  139. }
  140. leaks = append(leaksFromRepo, leaks...)
  141. }
  142. } else if opts.GithubOrg != "" || opts.GithubUser != "" {
  143. // Audit a github owner -- a user or organization.
  144. leaks, err = auditGithubRepos()
  145. if err != nil {
  146. return leaks, err
  147. }
  148. } else if opts.GitLabOrg != "" || opts.GitLabUser != "" {
  149. leaks, err = auditGitlabRepos()
  150. if err != nil {
  151. return leaks, err
  152. }
  153. } else if opts.GithubPR != "" {
  154. return auditGithubPR()
  155. }
  156. return leaks, nil
  157. }
  158. // writeReport writes a report to a file specified in the --report= option.
  159. // Default format for report is JSON. You can use the --csv option to write the report as a csv
  160. func writeReport(leaks []Leak) error {
  161. var err error
  162. if len(leaks) == 0 {
  163. return nil
  164. }
  165. log.Infof("writing report to %s", opts.Report)
  166. if strings.HasSuffix(opts.Report, ".csv") {
  167. f, err := os.Create(opts.Report)
  168. if err != nil {
  169. return err
  170. }
  171. defer f.Close()
  172. w := csv.NewWriter(f)
  173. w.Write([]string{"repo", "line", "commit", "offender", "reason", "commitMsg", "author", "file", "date"})
  174. for _, leak := range leaks {
  175. w.Write([]string{leak.Repo, leak.Line, leak.Commit, leak.Offender, leak.Type, leak.Message, leak.Author, leak.File, leak.Date.Format(time.RFC3339)})
  176. }
  177. w.Flush()
  178. } else {
  179. var (
  180. f *os.File
  181. encoder *json.Encoder
  182. )
  183. f, err := os.Create(opts.Report)
  184. if err != nil {
  185. return err
  186. }
  187. defer f.Close()
  188. encoder = json.NewEncoder(f)
  189. encoder.SetIndent("", "\t")
  190. if _, err := f.WriteString("[\n"); err != nil {
  191. return err
  192. }
  193. for i := 0; i < len(leaks); i++ {
  194. if err := encoder.Encode(leaks[i]); err != nil {
  195. return err
  196. }
  197. // for all but the last leak, seek back and overwrite the newline appended by Encode() with comma & newline
  198. if i+1 < len(leaks) {
  199. if _, err := f.Seek(-1, 1); err != nil {
  200. return err
  201. }
  202. if _, err := f.WriteString(",\n"); err != nil {
  203. return err
  204. }
  205. }
  206. }
  207. if _, err := f.WriteString("]"); err != nil {
  208. return err
  209. }
  210. if err := f.Sync(); err != nil {
  211. log.Error(err)
  212. return err
  213. }
  214. }
  215. return err
  216. }
  217. // cloneRepo clones a repo to memory(default) or to disk if the --disk option is set.
  218. func cloneRepo() (*RepoDescriptor, error) {
  219. var (
  220. err error
  221. repo *git.Repository
  222. )
  223. // check if repo is whitelisted
  224. for _, re := range config.WhiteList.repos {
  225. if re.FindString(opts.Repo) != "" {
  226. return nil, fmt.Errorf("skipping %s, whitelisted", opts.Repo)
  227. }
  228. }
  229. // check if cloning to disk
  230. if opts.Disk {
  231. log.Infof("cloning %s to disk", opts.Repo)
  232. cloneTarget := fmt.Sprintf("%s/%x", dir, md5.Sum([]byte(fmt.Sprintf("%s%s", opts.GithubUser, opts.Repo))))
  233. if strings.HasPrefix(opts.Repo, "git") {
  234. // private
  235. repo, err = git.PlainClone(cloneTarget, false, &git.CloneOptions{
  236. URL: opts.Repo,
  237. Progress: os.Stdout,
  238. Auth: config.sshAuth,
  239. })
  240. } else {
  241. // non-private
  242. repo, err = git.PlainClone(cloneTarget, false, &git.CloneOptions{
  243. URL: opts.Repo,
  244. Progress: os.Stdout,
  245. })
  246. }
  247. } else if opts.RepoPath != "" {
  248. // local repo
  249. log.Infof("opening %s", opts.RepoPath)
  250. repo, err = git.PlainOpen(opts.RepoPath)
  251. } else {
  252. // cloning to memory
  253. log.Infof("cloning %s", opts.Repo)
  254. if strings.HasPrefix(opts.Repo, "git") {
  255. repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
  256. URL: opts.Repo,
  257. Progress: os.Stdout,
  258. Auth: config.sshAuth,
  259. })
  260. } else {
  261. repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
  262. URL: opts.Repo,
  263. Progress: os.Stdout,
  264. })
  265. }
  266. }
  267. return &RepoDescriptor{
  268. repository: repo,
  269. path: opts.RepoPath,
  270. url: opts.Repo,
  271. name: filepath.Base(opts.Repo),
  272. err: err,
  273. }, nil
  274. }
  275. // auditGitRepo beings an audit on a git repository
  276. func auditGitRepo(repo *RepoDescriptor) ([]Leak, error) {
  277. var (
  278. err error
  279. leaks []Leak
  280. )
  281. for _, re := range config.WhiteList.repos {
  282. if re.FindString(repo.name) != "" {
  283. return leaks, fmt.Errorf("skipping %s, whitelisted", repo.name)
  284. }
  285. }
  286. // check if target contains an external gitleaks toml
  287. if opts.RepoConfig {
  288. err := config.updateFromRepo(repo)
  289. if err != nil {
  290. return leaks, nil
  291. }
  292. }
  293. // clear commit cache
  294. commitMap = make(map[string]bool)
  295. refs, err := repo.repository.Storer.IterReferences()
  296. if err != nil {
  297. return leaks, err
  298. }
  299. err = refs.ForEach(func(ref *plumbing.Reference) error {
  300. if ref.Name().IsTag() {
  301. return nil
  302. }
  303. branchLeaks := auditGitReference(repo, ref)
  304. for _, leak := range branchLeaks {
  305. leaks = append(leaks, leak)
  306. }
  307. return nil
  308. })
  309. return leaks, err
  310. }
  311. // auditGitReference beings the audit for a git reference. This function will
  312. // traverse the git reference and audit each line of each diff.
  313. func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
  314. var (
  315. err error
  316. repoName string
  317. leaks []Leak
  318. commitCount int64
  319. commitWg sync.WaitGroup
  320. mutex = &sync.Mutex{}
  321. semaphore chan bool
  322. )
  323. if auditDone {
  324. return nil
  325. }
  326. repoName = repo.name
  327. if opts.Threads != 0 {
  328. threads = opts.Threads
  329. }
  330. if opts.RepoPath != "" {
  331. threads = 1
  332. }
  333. semaphore = make(chan bool, threads)
  334. cIter, err := repo.repository.Log(&git.LogOptions{From: ref.Hash()})
  335. if err != nil {
  336. return nil
  337. }
  338. err = cIter.ForEach(func(c *object.Commit) error {
  339. if c == nil || (opts.Depth != 0 && commitCount == opts.Depth) || auditDone {
  340. if commitCount == opts.Depth {
  341. auditDone = true
  342. }
  343. return storer.ErrStop
  344. }
  345. commitCount = commitCount + 1
  346. if config.WhiteList.commits[c.Hash.String()] {
  347. log.Infof("skipping commit: %s\n", c.Hash.String())
  348. return nil
  349. }
  350. // commits w/o parent (root of git the git ref) or option for single commit is not empty str
  351. if len(c.ParentHashes) == 0 || opts.Commit == c.Hash.String() {
  352. if commitMap[c.Hash.String()] {
  353. return nil
  354. }
  355. if opts.Commit == c.Hash.String() {
  356. auditDone = true
  357. }
  358. cMutex.Lock()
  359. commitMap[c.Hash.String()] = true
  360. cMutex.Unlock()
  361. totalCommits = totalCommits + 1
  362. fIter, err := c.Files()
  363. if err != nil {
  364. return nil
  365. }
  366. err = fIter.ForEach(func(f *object.File) error {
  367. bin, err := f.IsBinary()
  368. if bin || err != nil {
  369. return nil
  370. }
  371. for _, re := range config.WhiteList.files {
  372. if re.FindString(f.Name) != "" {
  373. log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), f.Name)
  374. return nil
  375. }
  376. }
  377. content, err := f.Contents()
  378. if err != nil {
  379. return nil
  380. }
  381. diff := gitDiff{
  382. repoName: repoName,
  383. filePath: f.Name,
  384. content: content,
  385. sha: c.Hash.String(),
  386. author: c.Author.String(),
  387. message: strings.Replace(c.Message, "\n", " ", -1),
  388. date: c.Author.When,
  389. }
  390. fileLeaks := inspect(diff)
  391. mutex.Lock()
  392. leaks = append(leaks, fileLeaks...)
  393. mutex.Unlock()
  394. return nil
  395. })
  396. return nil
  397. }
  398. // single commit
  399. if opts.Commit != "" {
  400. return nil
  401. }
  402. skipCount := false
  403. err = c.Parents().ForEach(func(parent *object.Commit) error {
  404. // check if we've seen this diff before
  405. if commitMap[c.Hash.String()+parent.Hash.String()] {
  406. return nil
  407. }
  408. cMutex.Lock()
  409. commitMap[c.Hash.String()+parent.Hash.String()] = true
  410. cMutex.Unlock()
  411. if !skipCount {
  412. totalCommits = totalCommits + 1
  413. skipCount = true
  414. }
  415. commitWg.Add(1)
  416. semaphore <- true
  417. go func(c *object.Commit, parent *object.Commit) {
  418. var (
  419. filePath string
  420. skipFile bool
  421. )
  422. defer func() {
  423. commitWg.Done()
  424. <-semaphore
  425. if r := recover(); r != nil {
  426. log.Warnf("recovering from panic on commit %s, likely large diff causing panic", c.Hash.String())
  427. }
  428. }()
  429. patch, err := c.Patch(parent)
  430. if err != nil {
  431. log.Warnf("problem generating patch for commit: %s\n", c.Hash.String())
  432. return
  433. }
  434. for _, f := range patch.FilePatches() {
  435. if f.IsBinary() {
  436. continue
  437. }
  438. skipFile = false
  439. from, to := f.Files()
  440. filePath = "???"
  441. if from != nil {
  442. filePath = from.Path()
  443. } else if to != nil {
  444. filePath = to.Path()
  445. }
  446. for _, re := range config.WhiteList.files {
  447. if re.FindString(filePath) != "" {
  448. log.Debugf("skipping whitelisted file (matched regex '%s'): %s", re.String(), filePath)
  449. skipFile = true
  450. break
  451. }
  452. }
  453. if skipFile {
  454. continue
  455. }
  456. chunks := f.Chunks()
  457. for _, chunk := range chunks {
  458. if chunk.Type() == diffType.Add || chunk.Type() == diffType.Delete {
  459. diff := gitDiff{
  460. repoName: repoName,
  461. filePath: filePath,
  462. content: chunk.Content(),
  463. sha: c.Hash.String(),
  464. author: c.Author.String(),
  465. message: strings.Replace(c.Message, "\n", " ", -1),
  466. date: c.Author.When,
  467. }
  468. chunkLeaks := inspect(diff)
  469. for _, leak := range chunkLeaks {
  470. mutex.Lock()
  471. leaks = append(leaks, leak)
  472. mutex.Unlock()
  473. }
  474. }
  475. }
  476. }
  477. }(c, parent)
  478. return nil
  479. })
  480. // stop audit if we are at commitStop
  481. if c.Hash.String() == opts.CommitStop {
  482. auditDone = true
  483. return storer.ErrStop
  484. }
  485. return nil
  486. })
  487. commitWg.Wait()
  488. return leaks
  489. }
  490. // inspect will parse each line of the git diff's content against a set of regexes or
  491. // a set of regexes set by the config (see gitleaks.toml for example). This function
  492. // will skip lines that include a whitelisted regex. A list of leaks is returned.
  493. // If verbose mode (-v/--verbose) is set, then checkDiff will log leaks as they are discovered.
  494. func inspect(diff gitDiff) []Leak {
  495. var (
  496. leaks []Leak
  497. skipLine bool
  498. )
  499. lines := strings.Split(diff.content, "\n")
  500. for _, line := range lines {
  501. skipLine = false
  502. for _, re := range config.Regexes {
  503. match := re.regex.FindString(line)
  504. if match == "" {
  505. continue
  506. }
  507. if skipLine = isLineWhitelisted(line); skipLine {
  508. break
  509. }
  510. leaks = addLeak(leaks, line, match, re.description, diff)
  511. }
  512. if !skipLine && (opts.Entropy > 0 || len(config.Entropy.entropyRanges) != 0) {
  513. words := strings.Fields(line)
  514. for _, word := range words {
  515. entropy := getShannonEntropy(word)
  516. // Only check entropyRegexes and whiteListRegexes once per line, and only if an entropy leak type
  517. // was found above, since regex checks are expensive.
  518. if !entropyIsHighEnough(entropy) {
  519. continue
  520. }
  521. // If either the line is whitelisted or the line fails the noiseReduction check (when enabled),
  522. // then we can skip checking the rest of the line for high entropy words.
  523. if skipLine = !highEntropyLineIsALeak(line) || isLineWhitelisted(line); skipLine {
  524. break
  525. }
  526. leaks = addLeak(leaks, line, word, fmt.Sprintf("Entropy: %.2f", entropy), diff)
  527. }
  528. }
  529. }
  530. return leaks
  531. }
  532. // isLineWhitelisted returns true iff the line is matched by at least one of the whiteListRegexes.
  533. func isLineWhitelisted(line string) bool {
  534. for _, wRe := range config.WhiteList.regexes {
  535. whitelistMatch := wRe.FindString(line)
  536. if whitelistMatch != "" {
  537. return true
  538. }
  539. }
  540. return false
  541. }
  542. // addLeak is helper for func inspect() to append leaks if found during a diff check.
  543. func addLeak(leaks []Leak, line string, offender string, leakType string, diff gitDiff) []Leak {
  544. leak := Leak{
  545. Line: line,
  546. Commit: diff.sha,
  547. Offender: offender,
  548. Type: leakType,
  549. Author: diff.author,
  550. File: diff.filePath,
  551. Repo: diff.repoName,
  552. Message: diff.message,
  553. Date: diff.date,
  554. }
  555. if opts.Redact {
  556. leak.Offender = "REDACTED"
  557. leak.Line = strings.Replace(line, offender, "REDACTED", -1)
  558. }
  559. if opts.Verbose {
  560. leak.log()
  561. }
  562. leaks = append(leaks, leak)
  563. return leaks
  564. }
  565. // discoverRepos walks all the children of `path`. If a child directory
  566. // contain a .git file then that repo will be added to the list of repos returned
  567. func discoverRepos(ownerPath string) ([]*RepoDescriptor, error) {
  568. var (
  569. err error
  570. repos []*RepoDescriptor
  571. )
  572. files, err := ioutil.ReadDir(ownerPath)
  573. if err != nil {
  574. return repos, err
  575. }
  576. for _, f := range files {
  577. if f.IsDir() {
  578. repoPath := path.Join(ownerPath, f.Name())
  579. r, err := git.PlainOpen(repoPath)
  580. if err != nil {
  581. continue
  582. }
  583. repos = append(repos, &RepoDescriptor{
  584. repository: r,
  585. name: f.Name(),
  586. path: repoPath,
  587. })
  588. }
  589. }
  590. return repos, err
  591. }
  592. func (leak Leak) log() {
  593. b, _ := json.MarshalIndent(leak, "", " ")
  594. fmt.Println(string(b))
  595. }