|
|
@@ -2,20 +2,14 @@ package scan
|
|
|
|
|
|
import (
|
|
|
"bufio"
|
|
|
- "encoding/json"
|
|
|
"fmt"
|
|
|
- "math"
|
|
|
"os"
|
|
|
"path/filepath"
|
|
|
- "regexp"
|
|
|
"runtime"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"time"
|
|
|
|
|
|
- "github.com/zricethezav/gitleaks/v7/report"
|
|
|
-
|
|
|
- "github.com/zricethezav/gitleaks/v7/config"
|
|
|
"github.com/zricethezav/gitleaks/v7/options"
|
|
|
|
|
|
"github.com/go-git/go-git/v5"
|
|
|
@@ -30,8 +24,6 @@ const (
|
|
|
diffDelPrefix = "-"
|
|
|
diffLineSignature = " @@"
|
|
|
defaultLineNumber = 1
|
|
|
-
|
|
|
- maxLineLen = 200
|
|
|
)
|
|
|
|
|
|
func obtainCommit(repo *git.Repository, commitSha string) (*object.Commit, error) {
|
|
|
@@ -128,128 +120,6 @@ func howManyThreads(threads int) int {
|
|
|
return threads
|
|
|
}
|
|
|
|
|
|
-func shouldLog(scanner BaseScanner) bool {
|
|
|
- if scanner.opts.Verbose && scanner.scannerType != typeRepoScanner &&
|
|
|
- scanner.scannerType != typeCommitScanner &&
|
|
|
- scanner.scannerType != typeUnstagedScanner &&
|
|
|
- scanner.scannerType != typeNoGitScanner {
|
|
|
- return true
|
|
|
- }
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-func checkRules(scanner BaseScanner, commit *object.Commit, repoName, filePath, content string) []report.Leak {
|
|
|
- filename := filepath.Base(filePath)
|
|
|
- path := filepath.Dir(filePath)
|
|
|
- var leaks []report.Leak
|
|
|
-
|
|
|
- skipRuleLookup := make(map[string]bool)
|
|
|
- // First do simple rule checks based on filename
|
|
|
- if skipCheck(scanner.cfg, filename, path) {
|
|
|
- return leaks
|
|
|
- }
|
|
|
-
|
|
|
- for _, rule := range scanner.cfg.Rules {
|
|
|
- if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- if skipRule(rule, filename, filePath, commit.Hash.String()) {
|
|
|
- skipRuleLookup[rule.Description] = true
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // If it doesnt contain a Content regex then it is a filename regex match
|
|
|
- if !ruleContainRegex(rule) {
|
|
|
- leak := report.Leak{
|
|
|
- LineNumber: defaultLineNumber,
|
|
|
- Line: "",
|
|
|
- Offender: "Filename/path offender: " + filename,
|
|
|
- Commit: commit.Hash.String(),
|
|
|
- Repo: repoName,
|
|
|
- RepoURL: scanner.opts.RepoURL,
|
|
|
- Message: commit.Message,
|
|
|
- Rule: rule.Description,
|
|
|
- Author: commit.Author.Name,
|
|
|
- Email: commit.Author.Email,
|
|
|
- Date: commit.Author.When,
|
|
|
- Tags: strings.Join(rule.Tags, ", "),
|
|
|
- File: filePath,
|
|
|
- // Operation: diffOpToString(bundle.Operation),
|
|
|
- }
|
|
|
- leak.LeakURL = leakURL(leak)
|
|
|
- if shouldLog(scanner) {
|
|
|
- logLeak(leak, scanner.opts.Redact)
|
|
|
- }
|
|
|
- leaks = append(leaks, leak)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- lineNumber := 1
|
|
|
-
|
|
|
- for _, line := range strings.Split(content, "\n") {
|
|
|
- for _, rule := range scanner.cfg.Rules {
|
|
|
- if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
|
|
|
- break
|
|
|
- }
|
|
|
- if _, ok := skipRuleLookup[rule.Description]; ok {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- offender := rule.Regex.FindString(line)
|
|
|
- if offender == "" {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // check entropy
|
|
|
- groups := rule.Regex.FindStringSubmatch(offender)
|
|
|
- if isAllowListed(line, append(rule.AllowList.Regexes, scanner.cfg.Allowlist.Regexes...)) {
|
|
|
- continue
|
|
|
- }
|
|
|
- if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // 0 is a match for the full regex pattern
|
|
|
- if 0 < rule.ReportGroup && rule.ReportGroup < len(groups) {
|
|
|
- offender = groups[rule.ReportGroup]
|
|
|
- }
|
|
|
-
|
|
|
- leak := report.Leak{
|
|
|
- LineNumber: lineNumber,
|
|
|
- Line: line,
|
|
|
- Offender: offender,
|
|
|
- Commit: commit.Hash.String(),
|
|
|
- Repo: repoName,
|
|
|
- RepoURL: scanner.opts.RepoURL,
|
|
|
- Message: commit.Message,
|
|
|
- Rule: rule.Description,
|
|
|
- Author: commit.Author.Name,
|
|
|
- Email: commit.Author.Email,
|
|
|
- Date: commit.Author.When,
|
|
|
- Tags: strings.Join(rule.Tags, ", "),
|
|
|
- File: filePath,
|
|
|
- }
|
|
|
- leak.LeakURL = leakURL(leak)
|
|
|
- if shouldLog(scanner) {
|
|
|
- logLeak(leak, scanner.opts.Redact)
|
|
|
- }
|
|
|
- leaks = append(leaks, leak)
|
|
|
- }
|
|
|
- lineNumber++
|
|
|
- }
|
|
|
- return leaks
|
|
|
-}
|
|
|
-
|
|
|
-func logLeak(leak report.Leak, redact bool) {
|
|
|
- if redact {
|
|
|
- leak = report.RedactLeak(leak)
|
|
|
- }
|
|
|
- var b []byte
|
|
|
- b, _ = json.MarshalIndent(leak, "", " ")
|
|
|
- fmt.Println(string(b))
|
|
|
-}
|
|
|
-
|
|
|
// getLogOptions determines what log options are used when iterating through commits.
|
|
|
// It is similar to `git log {branch}`. Default behavior is to log ALL branches so
|
|
|
// gitleaks gets the full git history.
|
|
|
@@ -300,152 +170,6 @@ func logOptions(repo *git.Repository, opts options.Options) (*git.LogOptions, er
|
|
|
return &git.LogOptions{All: true}, nil
|
|
|
}
|
|
|
|
|
|
-func skipCheck(cfg config.Config, filename string, path string) bool {
|
|
|
- // We want to check if there is a allowlist for this file
|
|
|
- if len(cfg.Allowlist.Files) != 0 {
|
|
|
- for _, reFileName := range cfg.Allowlist.Files {
|
|
|
- if regexMatched(filename, reFileName) {
|
|
|
- log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
|
|
|
- return true
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // We want to check if there is a allowlist for this path
|
|
|
- if len(cfg.Allowlist.Paths) != 0 {
|
|
|
- for _, reFilePath := range cfg.Allowlist.Paths {
|
|
|
- if regexMatched(path, reFilePath) {
|
|
|
- log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
|
|
|
- return true
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-func skipRule(rule config.Rule, filename, path, commitSha string) bool {
|
|
|
- // For each rule we want to check filename allowlists
|
|
|
- if isAllowListed(filename, rule.AllowList.Files) || isAllowListed(path, rule.AllowList.Paths) {
|
|
|
- return true
|
|
|
- }
|
|
|
-
|
|
|
- // If it has fileNameRegex and it doesnt match we continue to next rule
|
|
|
- if ruleContainFileRegex(rule) && !regexMatched(filename, rule.File) {
|
|
|
- return true
|
|
|
- }
|
|
|
-
|
|
|
- // If it has filePathRegex and it doesnt match we continue to next rule
|
|
|
- if ruleContainPathRegex(rule) && !regexMatched(path, rule.Path) {
|
|
|
- return true
|
|
|
- }
|
|
|
-
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-// regexMatched matched an interface to a regular expression. The interface f can
|
|
|
-// be a string type or go-git *object.File type.
|
|
|
-func regexMatched(f string, re *regexp.Regexp) bool {
|
|
|
- if re == nil {
|
|
|
- return false
|
|
|
- }
|
|
|
- if re.FindString(f) != "" {
|
|
|
- return true
|
|
|
- }
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-// trippedEntropy checks if a given capture group or offender falls in between entropy ranges
|
|
|
-// supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
|
|
|
-func trippedEntropy(groups []string, rule config.Rule) bool {
|
|
|
- for _, e := range rule.Entropies {
|
|
|
- if len(groups) > e.Group {
|
|
|
- entropy := shannonEntropy(groups[e.Group])
|
|
|
- if entropy >= e.Min && entropy <= e.Max {
|
|
|
- return true
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-// shannonEntropy calculates the entropy of data using the formula defined here:
|
|
|
-// https://en.wiktionary.org/wiki/Shannon_entropy
|
|
|
-// Another way to think about what this is doing is calculating the number of bits
|
|
|
-// needed to on average encode the data. So, the higher the entropy, the more random the data, the
|
|
|
-// more bits needed to encode that data.
|
|
|
-func shannonEntropy(data string) (entropy float64) {
|
|
|
- if data == "" {
|
|
|
- return 0
|
|
|
- }
|
|
|
-
|
|
|
- charCounts := make(map[rune]int)
|
|
|
- for _, char := range data {
|
|
|
- charCounts[char]++
|
|
|
- }
|
|
|
-
|
|
|
- invLength := 1.0 / float64(len(data))
|
|
|
- for _, count := range charCounts {
|
|
|
- freq := float64(count) * invLength
|
|
|
- entropy -= freq * math.Log2(freq)
|
|
|
- }
|
|
|
-
|
|
|
- return entropy
|
|
|
-}
|
|
|
-
|
|
|
-// Checks if the given rule has a regex
|
|
|
-func ruleContainRegex(rule config.Rule) bool {
|
|
|
- if rule.Regex == nil {
|
|
|
- return false
|
|
|
- }
|
|
|
- if rule.Regex.String() == "" {
|
|
|
- return false
|
|
|
- }
|
|
|
- return true
|
|
|
-}
|
|
|
-
|
|
|
-// Checks if the given rule has a file name regex
|
|
|
-func ruleContainFileRegex(rule config.Rule) bool {
|
|
|
- if rule.File == nil {
|
|
|
- return false
|
|
|
- }
|
|
|
- if rule.File.String() == "" {
|
|
|
- return false
|
|
|
- }
|
|
|
- return true
|
|
|
-}
|
|
|
-
|
|
|
-// Checks if the given rule has a file path regex
|
|
|
-func ruleContainPathRegex(rule config.Rule) bool {
|
|
|
- if rule.Path == nil {
|
|
|
- return false
|
|
|
- }
|
|
|
- if rule.Path.String() == "" {
|
|
|
- return false
|
|
|
- }
|
|
|
- return true
|
|
|
-}
|
|
|
-
|
|
|
-func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
|
|
|
- for _, hash := range allowlistedCommits {
|
|
|
- if commitHash == hash {
|
|
|
- return true
|
|
|
- }
|
|
|
- }
|
|
|
- return false
|
|
|
-}
|
|
|
-
|
|
|
-func isAllowListed(target string, allowList []*regexp.Regexp) bool {
|
|
|
- if len(allowList) != 0 {
|
|
|
- for _, re := range allowList {
|
|
|
- if re.FindString(target) != "" {
|
|
|
- return true
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return false
|
|
|
-
|
|
|
-}
|
|
|
-
|
|
|
func optsToCommits(opts options.Options) ([]string, error) {
|
|
|
if opts.Commits != "" {
|
|
|
return strings.Split(opts.Commits, ","), nil
|
|
|
@@ -464,7 +188,7 @@ func optsToCommits(opts options.Options) ([]string, error) {
|
|
|
return commits, nil
|
|
|
}
|
|
|
|
|
|
-func extractLine(patchContent string, leak report.Leak, lineLookup map[string]bool) int {
|
|
|
+func extractLine(patchContent string, leak Leak, lineLookup map[string]bool) int {
|
|
|
i := strings.Index(patchContent, fmt.Sprintf("\n+++ b/%s", leak.File))
|
|
|
filePatchContent := patchContent[i+1:]
|
|
|
i = strings.Index(filePatchContent, "diff --git")
|
|
|
@@ -494,10 +218,3 @@ func extractLine(patchContent string, leak report.Leak, lineLookup map[string]bo
|
|
|
}
|
|
|
return defaultLineNumber
|
|
|
}
|
|
|
-
|
|
|
-func leakURL(leak report.Leak) string {
|
|
|
- if leak.RepoURL != "" {
|
|
|
- return fmt.Sprintf("%s/blob/%s/%s#L%d", leak.RepoURL, leak.Commit, leak.File, leak.LineNumber)
|
|
|
- }
|
|
|
- return ""
|
|
|
-}
|