utils.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. package scan
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "fmt"
  6. "math"
  7. "os"
  8. "path/filepath"
  9. "regexp"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "github.com/zricethezav/gitleaks/v7/report"
  15. "github.com/zricethezav/gitleaks/v7/config"
  16. "github.com/zricethezav/gitleaks/v7/options"
  17. "github.com/go-git/go-git/v5"
  18. "github.com/go-git/go-git/v5/plumbing"
  19. "github.com/go-git/go-git/v5/plumbing/object"
  20. "github.com/go-git/go-git/v5/storage/memory"
  21. log "github.com/sirupsen/logrus"
  22. )
  23. const (
  24. diffAddPrefix = "+"
  25. diffDelPrefix = "-"
  26. diffLineSignature = " @@"
  27. defaultLineNumber = 1
  28. maxLineLen = 200
  29. )
  30. func obtainCommit(repo *git.Repository, commitSha string) (*object.Commit, error) {
  31. if commitSha == "latest" {
  32. ref, err := repo.Head()
  33. if err != nil {
  34. return nil, err
  35. }
  36. commitSha = ref.Hash().String()
  37. }
  38. return repo.CommitObject(plumbing.NewHash(commitSha))
  39. }
  40. func getRepoName(opts options.Options) string {
  41. if opts.RepoURL != "" {
  42. return filepath.Base(opts.RepoURL)
  43. }
  44. if opts.Path != "" {
  45. return filepath.Base(opts.Path)
  46. }
  47. if opts.CheckUncommitted() {
  48. dir, _ := os.Getwd()
  49. return filepath.Base(dir)
  50. }
  51. return ""
  52. }
  53. func getRepo(opts options.Options) (*git.Repository, error) {
  54. if opts.OpenLocal() {
  55. if opts.Path != "" {
  56. log.Infof("opening %s\n", opts.Path)
  57. } else {
  58. log.Info("opening .")
  59. }
  60. return git.PlainOpen(opts.Path)
  61. }
  62. if opts.CheckUncommitted() {
  63. // open git repo from PWD
  64. dir, err := os.Getwd()
  65. if err != nil {
  66. return nil, err
  67. }
  68. log.Debugf("opening %s as a repo\n", dir)
  69. return git.PlainOpen(dir)
  70. }
  71. return cloneRepo(opts)
  72. }
  73. func cloneRepo(opts options.Options) (*git.Repository, error) {
  74. cloneOpts, err := opts.CloneOptions()
  75. if err != nil {
  76. return nil, err
  77. }
  78. if opts.ClonePath != "" {
  79. log.Infof("cloning... %s to %s", cloneOpts.URL, opts.ClonePath)
  80. return git.PlainClone(opts.ClonePath, false, cloneOpts)
  81. }
  82. log.Infof("cloning... %s", cloneOpts.URL)
  83. return git.Clone(memory.NewStorage(), nil, cloneOpts)
  84. }
  85. // depthReached checks if i meets the depth (--depth=) if set
  86. func depthReached(i int, opts options.Options) bool {
  87. if opts.Depth != 0 && opts.Depth == i {
  88. log.Warnf("Exceeded depth limit (%d)", i)
  89. return true
  90. }
  91. return false
  92. }
  93. // emptyCommit generates an empty commit used for scanning uncommitted changes
  94. func emptyCommit() *object.Commit {
  95. return &object.Commit{
  96. Hash: plumbing.Hash{},
  97. Message: "",
  98. Author: object.Signature{
  99. Name: "",
  100. Email: "",
  101. When: time.Unix(0, 0).UTC(),
  102. },
  103. }
  104. }
  105. // howManyThreads will return a number 1-GOMAXPROCS which is the number
  106. // of goroutines that will spawn during gitleaks execution
  107. func howManyThreads(threads int) int {
  108. maxThreads := runtime.GOMAXPROCS(0)
  109. if threads == 0 {
  110. return 1
  111. } else if threads > maxThreads {
  112. log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
  113. return maxThreads
  114. }
  115. return threads
  116. }
  117. func shouldLog(scanner BaseScanner) bool {
  118. if scanner.opts.Verbose && scanner.scannerType != typeRepoScanner &&
  119. scanner.scannerType != typeCommitScanner &&
  120. scanner.scannerType != typeUnstagedScanner &&
  121. scanner.scannerType != typeNoGitScanner {
  122. return true
  123. }
  124. return false
  125. }
  126. func checkRules(scanner BaseScanner, commit *object.Commit, repoName, filePath, content string) []report.Leak {
  127. filename := filepath.Base(filePath)
  128. path := filepath.Dir(filePath)
  129. var leaks []report.Leak
  130. skipRuleLookup := make(map[string]bool)
  131. // First do simple rule checks based on filename
  132. if skipCheck(scanner.cfg, filename, path) {
  133. return leaks
  134. }
  135. for _, rule := range scanner.cfg.Rules {
  136. if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
  137. continue
  138. }
  139. if skipRule(rule, filename, filePath, commit.Hash.String()) {
  140. skipRuleLookup[rule.Description] = true
  141. continue
  142. }
  143. // If it doesnt contain a Content regex then it is a filename regex match
  144. if !ruleContainRegex(rule) {
  145. leak := report.Leak{
  146. LineNumber: defaultLineNumber,
  147. Line: "",
  148. Offender: "Filename/path offender: " + filename,
  149. Commit: commit.Hash.String(),
  150. Repo: repoName,
  151. RepoURL: scanner.opts.RepoURL,
  152. Message: commit.Message,
  153. Rule: rule.Description,
  154. Author: commit.Author.Name,
  155. Email: commit.Author.Email,
  156. Date: commit.Author.When,
  157. Tags: strings.Join(rule.Tags, ", "),
  158. File: filePath,
  159. // Operation: diffOpToString(bundle.Operation),
  160. }
  161. leak.LeakURL = leakURL(leak)
  162. if shouldLog(scanner) {
  163. logLeak(leak, scanner.opts.Redact)
  164. }
  165. leaks = append(leaks, leak)
  166. }
  167. }
  168. lineNumber := 1
  169. for _, line := range strings.Split(content, "\n") {
  170. for _, rule := range scanner.cfg.Rules {
  171. if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
  172. break
  173. }
  174. if _, ok := skipRuleLookup[rule.Description]; ok {
  175. continue
  176. }
  177. offender := rule.Regex.FindString(line)
  178. if offender == "" {
  179. continue
  180. }
  181. // check entropy
  182. groups := rule.Regex.FindStringSubmatch(offender)
  183. if isAllowListed(line, append(rule.AllowList.Regexes, scanner.cfg.Allowlist.Regexes...)) {
  184. continue
  185. }
  186. if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
  187. continue
  188. }
  189. // 0 is a match for the full regex pattern
  190. if 0 < rule.ReportGroup && rule.ReportGroup < len(groups) {
  191. offender = groups[rule.ReportGroup]
  192. }
  193. leak := report.Leak{
  194. LineNumber: lineNumber,
  195. Line: line,
  196. Offender: offender,
  197. Commit: commit.Hash.String(),
  198. Repo: repoName,
  199. RepoURL: scanner.opts.RepoURL,
  200. Message: commit.Message,
  201. Rule: rule.Description,
  202. Author: commit.Author.Name,
  203. Email: commit.Author.Email,
  204. Date: commit.Author.When,
  205. Tags: strings.Join(rule.Tags, ", "),
  206. File: filePath,
  207. }
  208. leak.LeakURL = leakURL(leak)
  209. if shouldLog(scanner) {
  210. logLeak(leak, scanner.opts.Redact)
  211. }
  212. leaks = append(leaks, leak)
  213. }
  214. lineNumber++
  215. }
  216. return leaks
  217. }
  218. func logLeak(leak report.Leak, redact bool) {
  219. if redact {
  220. leak = report.RedactLeak(leak)
  221. }
  222. var b []byte
  223. b, _ = json.MarshalIndent(leak, "", " ")
  224. fmt.Println(string(b))
  225. }
  226. // getLogOptions determines what log options are used when iterating through commits.
  227. // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
  228. // gitleaks gets the full git history.
  229. func logOptions(repo *git.Repository, opts options.Options) (*git.LogOptions, error) {
  230. var logOpts git.LogOptions
  231. const dateformat string = "2006-01-02"
  232. const timeformat string = "2006-01-02T15:04:05-0700"
  233. if opts.CommitFrom != "" {
  234. logOpts.From = plumbing.NewHash(opts.CommitFrom)
  235. }
  236. if opts.CommitSince != "" {
  237. if t, err := time.Parse(timeformat, opts.CommitSince); err == nil {
  238. logOpts.Since = &t
  239. } else if t, err := time.Parse(dateformat, opts.CommitSince); err == nil {
  240. logOpts.Since = &t
  241. } else {
  242. return nil, err
  243. }
  244. logOpts.All = true
  245. }
  246. if opts.CommitUntil != "" {
  247. if t, err := time.Parse(timeformat, opts.CommitUntil); err == nil {
  248. logOpts.Until = &t
  249. } else if t, err := time.Parse(dateformat, opts.CommitUntil); err == nil {
  250. logOpts.Until = &t
  251. } else {
  252. return nil, err
  253. }
  254. logOpts.All = true
  255. }
  256. if opts.Branch != "" {
  257. ref, err := repo.Storer.Reference(plumbing.NewBranchReferenceName(opts.Branch))
  258. if err != nil {
  259. return nil, fmt.Errorf("could not find branch %s", opts.Branch)
  260. }
  261. logOpts = git.LogOptions{
  262. From: ref.Hash(),
  263. }
  264. if logOpts.From.IsZero() {
  265. return nil, fmt.Errorf("could not find branch %s", opts.Branch)
  266. }
  267. return &logOpts, nil
  268. }
  269. if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
  270. return &logOpts, nil
  271. }
  272. return &git.LogOptions{All: true}, nil
  273. }
  274. func skipCheck(cfg config.Config, filename string, path string) bool {
  275. // We want to check if there is a allowlist for this file
  276. if len(cfg.Allowlist.Files) != 0 {
  277. for _, reFileName := range cfg.Allowlist.Files {
  278. if regexMatched(filename, reFileName) {
  279. log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
  280. return true
  281. }
  282. }
  283. }
  284. // We want to check if there is a allowlist for this path
  285. if len(cfg.Allowlist.Paths) != 0 {
  286. for _, reFilePath := range cfg.Allowlist.Paths {
  287. if regexMatched(path, reFilePath) {
  288. log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
  289. return true
  290. }
  291. }
  292. }
  293. return false
  294. }
  295. func skipRule(rule config.Rule, filename, path, commitSha string) bool {
  296. // For each rule we want to check filename allowlists
  297. if isAllowListed(filename, rule.AllowList.Files) || isAllowListed(path, rule.AllowList.Paths) {
  298. return true
  299. }
  300. // If it has fileNameRegex and it doesnt match we continue to next rule
  301. if ruleContainFileRegex(rule) && !regexMatched(filename, rule.File) {
  302. return true
  303. }
  304. // If it has filePathRegex and it doesnt match we continue to next rule
  305. if ruleContainPathRegex(rule) && !regexMatched(path, rule.Path) {
  306. return true
  307. }
  308. return false
  309. }
  310. // regexMatched matched an interface to a regular expression. The interface f can
  311. // be a string type or go-git *object.File type.
  312. func regexMatched(f string, re *regexp.Regexp) bool {
  313. if re == nil {
  314. return false
  315. }
  316. if re.FindString(f) != "" {
  317. return true
  318. }
  319. return false
  320. }
  321. // trippedEntropy checks if a given capture group or offender falls in between entropy ranges
  322. // supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
  323. func trippedEntropy(groups []string, rule config.Rule) bool {
  324. for _, e := range rule.Entropies {
  325. if len(groups) > e.Group {
  326. entropy := shannonEntropy(groups[e.Group])
  327. if entropy >= e.Min && entropy <= e.Max {
  328. return true
  329. }
  330. }
  331. }
  332. return false
  333. }
  334. // shannonEntropy calculates the entropy of data using the formula defined here:
  335. // https://en.wiktionary.org/wiki/Shannon_entropy
  336. // Another way to think about what this is doing is calculating the number of bits
  337. // needed to on average encode the data. So, the higher the entropy, the more random the data, the
  338. // more bits needed to encode that data.
  339. func shannonEntropy(data string) (entropy float64) {
  340. if data == "" {
  341. return 0
  342. }
  343. charCounts := make(map[rune]int)
  344. for _, char := range data {
  345. charCounts[char]++
  346. }
  347. invLength := 1.0 / float64(len(data))
  348. for _, count := range charCounts {
  349. freq := float64(count) * invLength
  350. entropy -= freq * math.Log2(freq)
  351. }
  352. return entropy
  353. }
  354. // Checks if the given rule has a regex
  355. func ruleContainRegex(rule config.Rule) bool {
  356. if rule.Regex == nil {
  357. return false
  358. }
  359. if rule.Regex.String() == "" {
  360. return false
  361. }
  362. return true
  363. }
  364. // Checks if the given rule has a file name regex
  365. func ruleContainFileRegex(rule config.Rule) bool {
  366. if rule.File == nil {
  367. return false
  368. }
  369. if rule.File.String() == "" {
  370. return false
  371. }
  372. return true
  373. }
  374. // Checks if the given rule has a file path regex
  375. func ruleContainPathRegex(rule config.Rule) bool {
  376. if rule.Path == nil {
  377. return false
  378. }
  379. if rule.Path.String() == "" {
  380. return false
  381. }
  382. return true
  383. }
  384. func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
  385. for _, hash := range allowlistedCommits {
  386. if commitHash == hash {
  387. return true
  388. }
  389. }
  390. return false
  391. }
  392. func isAllowListed(target string, allowList []*regexp.Regexp) bool {
  393. if len(allowList) != 0 {
  394. for _, re := range allowList {
  395. if re.FindString(target) != "" {
  396. return true
  397. }
  398. }
  399. }
  400. return false
  401. }
  402. func optsToCommits(opts options.Options) ([]string, error) {
  403. if opts.Commits != "" {
  404. return strings.Split(opts.Commits, ","), nil
  405. }
  406. file, err := os.Open(opts.CommitsFile)
  407. if err != nil {
  408. return []string{}, err
  409. }
  410. defer file.Close()
  411. scanner := bufio.NewScanner(file)
  412. var commits []string
  413. for scanner.Scan() {
  414. commits = append(commits, scanner.Text())
  415. }
  416. return commits, nil
  417. }
  418. func extractLine(patchContent string, leak report.Leak, lineLookup map[string]bool) int {
  419. i := strings.Index(patchContent, fmt.Sprintf("\n+++ b/%s", leak.File))
  420. filePatchContent := patchContent[i+1:]
  421. i = strings.Index(filePatchContent, "diff --git")
  422. if i != -1 {
  423. filePatchContent = filePatchContent[:i]
  424. }
  425. chunkStartLine := 0
  426. currLine := 0
  427. for _, patchLine := range strings.Split(filePatchContent, "\n") {
  428. if strings.HasPrefix(patchLine, "@@") {
  429. i := strings.Index(patchLine, diffAddPrefix)
  430. pairs := strings.Split(strings.Split(patchLine[i+1:], diffLineSignature)[0], ",")
  431. chunkStartLine, _ = strconv.Atoi(pairs[0])
  432. currLine = -1
  433. }
  434. if strings.HasPrefix(patchLine, diffDelPrefix) {
  435. currLine--
  436. }
  437. if strings.HasPrefix(patchLine, diffAddPrefix) && strings.Contains(patchLine, leak.Line) {
  438. lineNumber := chunkStartLine + currLine
  439. if _, ok := lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)]; !ok {
  440. lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)] = true
  441. return lineNumber
  442. }
  443. }
  444. currLine++
  445. }
  446. return defaultLineNumber
  447. }
  448. func leakURL(leak report.Leak) string {
  449. if leak.RepoURL != "" {
  450. return fmt.Sprintf("%s/blob/%s/%s#L%d", leak.RepoURL, leak.Commit, leak.File, leak.LineNumber)
  451. }
  452. return ""
  453. }