utils.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. package scan
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "fmt"
  6. "math"
  7. "os"
  8. "path/filepath"
  9. "regexp"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "github.com/zricethezav/gitleaks/v7/report"
  15. "github.com/zricethezav/gitleaks/v7/config"
  16. "github.com/zricethezav/gitleaks/v7/options"
  17. "github.com/go-git/go-git/v5"
  18. "github.com/go-git/go-git/v5/plumbing"
  19. "github.com/go-git/go-git/v5/plumbing/object"
  20. "github.com/go-git/go-git/v5/storage/memory"
  21. log "github.com/sirupsen/logrus"
  22. )
  23. const (
  24. diffAddPrefix = "+"
  25. diffDelPrefix = "-"
  26. diffLineSignature = " @@"
  27. defaultLineNumber = 1
  28. maxLineLen = 200
  29. )
  30. func obtainCommit(repo *git.Repository, commitSha string) (*object.Commit, error) {
  31. if commitSha == "latest" {
  32. ref, err := repo.Head()
  33. if err != nil {
  34. return nil, err
  35. }
  36. commitSha = ref.Hash().String()
  37. }
  38. return repo.CommitObject(plumbing.NewHash(commitSha))
  39. }
  40. func getRepoName(opts options.Options) string {
  41. if opts.RepoURL != "" {
  42. return filepath.Base(opts.RepoURL)
  43. }
  44. if opts.Path != "" {
  45. return filepath.Base(opts.Path)
  46. }
  47. if opts.CheckUncommitted() {
  48. dir, _ := os.Getwd()
  49. return filepath.Base(dir)
  50. }
  51. return ""
  52. }
  53. func getRepo(opts options.Options) (*git.Repository, error) {
  54. if opts.OpenLocal() {
  55. if opts.Path != "" {
  56. log.Infof("opening %s\n", opts.Path)
  57. } else {
  58. log.Info("opening .")
  59. }
  60. return git.PlainOpen(opts.Path)
  61. }
  62. if opts.CheckUncommitted() {
  63. // open git repo from PWD
  64. dir, err := os.Getwd()
  65. if err != nil {
  66. return nil, err
  67. }
  68. log.Debugf("opening %s as a repo\n", dir)
  69. return git.PlainOpen(dir)
  70. }
  71. return cloneRepo(opts)
  72. }
  73. func cloneRepo(opts options.Options) (*git.Repository, error) {
  74. cloneOpts, err := opts.CloneOptions()
  75. if err != nil {
  76. return nil, err
  77. }
  78. if opts.ClonePath != "" {
  79. log.Infof("cloning... %s to %s", cloneOpts.URL, opts.ClonePath)
  80. return git.PlainClone(opts.ClonePath, false, cloneOpts)
  81. }
  82. log.Infof("cloning... %s", cloneOpts.URL)
  83. return git.Clone(memory.NewStorage(), nil, cloneOpts)
  84. }
  85. // depthReached checks if i meets the depth (--depth=) if set
  86. func depthReached(i int, opts options.Options) bool {
  87. if opts.Depth != 0 && opts.Depth == i {
  88. log.Warnf("Exceeded depth limit (%d)", i)
  89. return true
  90. }
  91. return false
  92. }
  93. // emptyCommit generates an empty commit used for scanning uncommitted changes
  94. func emptyCommit() *object.Commit {
  95. return &object.Commit{
  96. Hash: plumbing.Hash{},
  97. Message: "",
  98. Author: object.Signature{
  99. Name: "",
  100. Email: "",
  101. When: time.Unix(0, 0).UTC(),
  102. },
  103. }
  104. }
  105. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  106. // of goroutines that will spawn during gitleaks execution
  107. func howManyThreads(threads int) int {
  108. maxThreads := runtime.GOMAXPROCS(0)
  109. if threads == 0 {
  110. return 1
  111. } else if threads > maxThreads {
  112. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  113. return maxThreads
  114. }
  115. return threads
  116. }
  117. func shouldLog(scanner BaseScanner) bool {
  118. if scanner.opts.Verbose && scanner.scannerType != typeRepoScanner &&
  119. scanner.scannerType != typeCommitScanner &&
  120. scanner.scannerType != typeUnstagedScanner &&
  121. scanner.scannerType != typeNoGitScanner {
  122. return true
  123. }
  124. return false
  125. }
  126. func checkRules(scanner BaseScanner, commit *object.Commit, repoName, filePath, content string) []report.Leak {
  127. filename := filepath.Base(filePath)
  128. path := filepath.Dir(filePath)
  129. var leaks []report.Leak
  130. skipRuleLookup := make(map[string]bool)
  131. // First do simple rule checks based on filename
  132. if skipCheck(scanner.cfg, filename, path) {
  133. return leaks
  134. }
  135. for _, rule := range scanner.cfg.Rules {
  136. if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
  137. continue
  138. }
  139. if skipRule(rule, filename, filePath, commit.Hash.String()) {
  140. skipRuleLookup[rule.Description] = true
  141. continue
  142. }
  143. // If it doesnt contain a Content regex then it is a filename regex match
  144. if !ruleContainRegex(rule) {
  145. leak := report.Leak{
  146. LineNumber: defaultLineNumber,
  147. Line: "",
  148. Offender: limitLen("Filename/path offender: " + filename),
  149. Commit: commit.Hash.String(),
  150. Repo: repoName,
  151. RepoURL: scanner.opts.RepoURL,
  152. Message: limitLen(commit.Message),
  153. Rule: rule.Description,
  154. Author: commit.Author.Name,
  155. Email: commit.Author.Email,
  156. Date: commit.Author.When,
  157. Tags: strings.Join(rule.Tags, ", "),
  158. File: filePath,
  159. // Operation: diffOpToString(bundle.Operation),
  160. }
  161. leak.LeakURL = leakURL(leak)
  162. if shouldLog(scanner) {
  163. logLeak(leak, scanner.opts.Redact)
  164. }
  165. leaks = append(leaks, leak)
  166. }
  167. }
  168. lineNumber := 1
  169. for _, line := range strings.Split(content, "\n") {
  170. for _, rule := range scanner.cfg.Rules {
  171. if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
  172. break
  173. }
  174. if _, ok := skipRuleLookup[rule.Description]; ok {
  175. continue
  176. }
  177. offender := rule.Regex.FindString(line)
  178. if offender == "" {
  179. continue
  180. }
  181. // check entropy
  182. groups := rule.Regex.FindStringSubmatch(offender)
  183. if isAllowListed(line, append(rule.AllowList.Regexes, scanner.cfg.Allowlist.Regexes...)) {
  184. continue
  185. }
  186. if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
  187. continue
  188. }
  189. // 0 is a match for the full regex pattern
  190. if 0 < rule.ReportGroup && rule.ReportGroup < len(groups) {
  191. offender = groups[rule.ReportGroup]
  192. }
  193. leak := report.Leak{
  194. LineNumber: lineNumber,
  195. Line: limitLen(line),
  196. Offender: limitLen(offender),
  197. Commit: commit.Hash.String(),
  198. Repo: repoName,
  199. RepoURL: scanner.opts.RepoURL,
  200. Message: limitLen(commit.Message),
  201. Rule: rule.Description,
  202. Author: commit.Author.Name,
  203. Email: commit.Author.Email,
  204. Date: commit.Author.When,
  205. Tags: strings.Join(rule.Tags, ", "),
  206. File: filePath,
  207. }
  208. leak.LeakURL = leakURL(leak)
  209. if shouldLog(scanner) {
  210. logLeak(leak, scanner.opts.Redact)
  211. }
  212. leaks = append(leaks, leak)
  213. }
  214. lineNumber++
  215. }
  216. return leaks
  217. }
  218. func limitLen(str string) string {
  219. if len(str) > 200 {
  220. return str[0:maxLineLen-1] + "..."
  221. }
  222. return str
  223. }
  224. func logLeak(leak report.Leak, redact bool) {
  225. if redact {
  226. leak = report.RedactLeak(leak)
  227. }
  228. var b []byte
  229. b, _ = json.MarshalIndent(leak, "", " ")
  230. fmt.Println(string(b))
  231. }
  232. // getLogOptions determines what log options are used when iterating through commits.
  233. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  234. // gitleaks gets the full git history.
  235. func logOptions(repo *git.Repository, opts options.Options) (*git.LogOptions, error) {
  236. var logOpts git.LogOptions
  237. const dateformat string = "2006-01-02"
  238. const timeformat string = "2006-01-02T15:04:05-0700"
  239. if opts.CommitFrom != "" {
  240. logOpts.From = plumbing.NewHash(opts.CommitFrom)
  241. }
  242. if opts.CommitSince != "" {
  243. if t, err := time.Parse(timeformat, opts.CommitSince); err == nil {
  244. logOpts.Since = &t
  245. } else if t, err := time.Parse(dateformat, opts.CommitSince); err == nil {
  246. logOpts.Since = &t
  247. } else {
  248. return nil, err
  249. }
  250. logOpts.All = true
  251. }
  252. if opts.CommitUntil != "" {
  253. if t, err := time.Parse(timeformat, opts.CommitUntil); err == nil {
  254. logOpts.Until = &t
  255. } else if t, err := time.Parse(dateformat, opts.CommitUntil); err == nil {
  256. logOpts.Until = &t
  257. } else {
  258. return nil, err
  259. }
  260. logOpts.All = true
  261. }
  262. if opts.Branch != "" {
  263. ref, err := repo.Storer.Reference(plumbing.NewBranchReferenceName(opts.Branch))
  264. if err != nil {
  265. return nil, fmt.Errorf("could not find branch %s", opts.Branch)
  266. }
  267. logOpts = git.LogOptions{
  268. From: ref.Hash(),
  269. }
  270. if logOpts.From.IsZero() {
  271. return nil, fmt.Errorf("could not find branch %s", opts.Branch)
  272. }
  273. return &logOpts, nil
  274. }
  275. if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
  276. return &logOpts, nil
  277. }
  278. return &git.LogOptions{All: true}, nil
  279. }
  280. func skipCheck(cfg config.Config, filename string, path string) bool {
  281. // We want to check if there is a allowlist for this file
  282. if len(cfg.Allowlist.Files) != 0 {
  283. for _, reFileName := range cfg.Allowlist.Files {
  284. if regexMatched(filename, reFileName) {
  285. log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
  286. return true
  287. }
  288. }
  289. }
  290. // We want to check if there is a allowlist for this path
  291. if len(cfg.Allowlist.Paths) != 0 {
  292. for _, reFilePath := range cfg.Allowlist.Paths {
  293. if regexMatched(path, reFilePath) {
  294. log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
  295. return true
  296. }
  297. }
  298. }
  299. return false
  300. }
  301. func skipRule(rule config.Rule, filename, path, commitSha string) bool {
  302. // For each rule we want to check filename allowlists
  303. if isAllowListed(filename, rule.AllowList.Files) || isAllowListed(path, rule.AllowList.Paths) {
  304. return true
  305. }
  306. // If it has fileNameRegex and it doesnt match we continue to next rule
  307. if ruleContainFileRegex(rule) && !regexMatched(filename, rule.File) {
  308. return true
  309. }
  310. // If it has filePathRegex and it doesnt match we continue to next rule
  311. if ruleContainPathRegex(rule) && !regexMatched(path, rule.Path) {
  312. return true
  313. }
  314. return false
  315. }
  316. // regexMatched matched an interface to a regular expression. The interface f can
  317. // be a string type or go-git *object.File type.
  318. func regexMatched(f string, re *regexp.Regexp) bool {
  319. if re == nil {
  320. return false
  321. }
  322. if re.FindString(f) != "" {
  323. return true
  324. }
  325. return false
  326. }
  327. // trippedEntropy checks if a given capture group or offender falls in between entropy ranges
  328. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  329. func trippedEntropy(groups []string, rule config.Rule) bool {
  330. for _, e := range rule.Entropies {
  331. if len(groups) > e.Group {
  332. entropy := shannonEntropy(groups[e.Group])
  333. if entropy >= e.Min && entropy <= e.Max {
  334. return true
  335. }
  336. }
  337. }
  338. return false
  339. }
  340. // shannonEntropy calculates the entropy of data using the formula defined here:
  341. // https://en.wiktionary.org/wiki/Shannon_entropy
  342. // Another way to think about what this is doing is calculating the number of bits
  343. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  344. // more bits needed to encode that data.
  345. func shannonEntropy(data string) (entropy float64) {
  346. if data == "" {
  347. return 0
  348. }
  349. charCounts := make(map[rune]int)
  350. for _, char := range data {
  351. charCounts[char]++
  352. }
  353. invLength := 1.0 / float64(len(data))
  354. for _, count := range charCounts {
  355. freq := float64(count) * invLength
  356. entropy -= freq * math.Log2(freq)
  357. }
  358. return entropy
  359. }
  360. // Checks if the given rule has a regex
  361. func ruleContainRegex(rule config.Rule) bool {
  362. if rule.Regex == nil {
  363. return false
  364. }
  365. if rule.Regex.String() == "" {
  366. return false
  367. }
  368. return true
  369. }
  370. // Checks if the given rule has a file name regex
  371. func ruleContainFileRegex(rule config.Rule) bool {
  372. if rule.File == nil {
  373. return false
  374. }
  375. if rule.File.String() == "" {
  376. return false
  377. }
  378. return true
  379. }
  380. // Checks if the given rule has a file path regex
  381. func ruleContainPathRegex(rule config.Rule) bool {
  382. if rule.Path == nil {
  383. return false
  384. }
  385. if rule.Path.String() == "" {
  386. return false
  387. }
  388. return true
  389. }
  390. func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
  391. for _, hash := range allowlistedCommits {
  392. if commitHash == hash {
  393. return true
  394. }
  395. }
  396. return false
  397. }
  398. func isAllowListed(target string, allowList []*regexp.Regexp) bool {
  399. if len(allowList) != 0 {
  400. for _, re := range allowList {
  401. if re.FindString(target) != "" {
  402. return true
  403. }
  404. }
  405. }
  406. return false
  407. }
  408. func optsToCommits(opts options.Options) ([]string, error) {
  409. if opts.Commits != "" {
  410. return strings.Split(opts.Commits, ","), nil
  411. }
  412. file, err := os.Open(opts.CommitsFile)
  413. if err != nil {
  414. return []string{}, err
  415. }
  416. defer file.Close()
  417. scanner := bufio.NewScanner(file)
  418. var commits []string
  419. for scanner.Scan() {
  420. commits = append(commits, scanner.Text())
  421. }
  422. return commits, nil
  423. }
  424. func extractLine(patchContent string, leak report.Leak, lineLookup map[string]bool) int {
  425. i := strings.Index(patchContent, fmt.Sprintf("\n+++ b/%s", leak.File))
  426. filePatchContent := patchContent[i+1:]
  427. i = strings.Index(filePatchContent, "diff --git")
  428. if i != -1 {
  429. filePatchContent = filePatchContent[:i]
  430. }
  431. chunkStartLine := 0
  432. currLine := 0
  433. for _, patchLine := range strings.Split(filePatchContent, "\n") {
  434. if strings.HasPrefix(patchLine, "@@") {
  435. i := strings.Index(patchLine, diffAddPrefix)
  436. pairs := strings.Split(strings.Split(patchLine[i+1:], diffLineSignature)[0], ",")
  437. chunkStartLine, _ = strconv.Atoi(pairs[0])
  438. currLine = -1
  439. }
  440. if strings.HasPrefix(patchLine, diffDelPrefix) {
  441. currLine--
  442. }
  443. if strings.HasPrefix(patchLine, diffAddPrefix) && strings.Contains(patchLine, leak.Line) {
  444. lineNumber := chunkStartLine + currLine
  445. if _, ok := lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)]; !ok {
  446. lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)] = true
  447. return lineNumber
  448. }
  449. }
  450. currLine++
  451. }
  452. return defaultLineNumber
  453. }
  454. func leakURL(leak report.Leak) string {
  455. if leak.RepoURL != "" {
  456. return fmt.Sprintf("%s/blob/%s/%s#L%d", leak.RepoURL, leak.Commit, leak.File, leak.LineNumber)
  457. }
  458. return ""
  459. }