| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405 |
- package detect
- import (
- "context"
- "fmt"
- "os"
- "path/filepath"
- "regexp"
- "strings"
- "sync"
- "github.com/zricethezav/gitleaks/v8/config"
- "github.com/zricethezav/gitleaks/v8/detect/git"
- "github.com/zricethezav/gitleaks/v8/report"
- "github.com/fatih/semgroup"
- "github.com/gitleaks/go-gitdiff/gitdiff"
- "github.com/h2non/filetype"
- "github.com/rs/zerolog/log"
- "github.com/spf13/viper"
- )
- // Type used to differentiate between git scan types:
- // $ gitleaks detect
- // $ gitleaks protect
- // $ gitleaks protect staged
- type GitScanType int
- const (
- DetectType GitScanType = iota
- ProtectType
- ProtectStagedType
- gitleaksAllowSignature = "gitleaks:allow"
- )
- // Detector is the main detector struct
- type Detector struct {
- // Config is the configuration for the detector
- Config config.Config
- // Redact is a flag to redact findings. This is exported
- // so users using gitleaks as a library can set this flag
- // without calling `detector.Start(cmd *cobra.Command)`
- Redact bool
- // verbose is a flag to print findings
- Verbose bool
- // commitMap is used to keep track of commits that have been scanned.
- // This is only used for logging purposes and git scans.
- commitMap map[string]bool
- // findingMutex is to prevent concurrent access to the
- // findings slice when adding findings.
- findingMutex *sync.Mutex
- // findings is a slice of report.Findings. This is the result
- // of the detector's scan which can then be used to generate a
- // report.
- findings []report.Finding
- }
- // Fragment contains the data to be scanned
- type Fragment struct {
- // Raw is the raw content of the fragment
- Raw string
- // FilePath is the path to the file if applicable
- FilePath string
- // CommitSHA is the SHA of the commit if applicable
- CommitSHA string
- // newlineIndices is a list of indices of newlines in the raw content.
- // This is used to calculate the line location of a finding
- newlineIndices [][]int
- }
- // NewDetector creates a new detector with the given config
- func NewDetector(cfg config.Config) *Detector {
- return &Detector{
- commitMap: make(map[string]bool),
- findingMutex: &sync.Mutex{},
- findings: make([]report.Finding, 0),
- Config: cfg,
- }
- }
- // NewDetectorDefaultConfig creates a new detector with the default config
- func NewDetectorDefaultConfig() (*Detector, error) {
- viper.SetConfigType("toml")
- err := viper.ReadConfig(strings.NewReader(config.DefaultConfig))
- if err != nil {
- return nil, err
- }
- var vc config.ViperConfig
- err = viper.Unmarshal(&vc)
- if err != nil {
- return nil, err
- }
- cfg, err := vc.Translate()
- if err != nil {
- return nil, err
- }
- return NewDetector(cfg), nil
- }
- // DetectBytes scans the given bytes and returns a list of findings
- func (d *Detector) DetectBytes(content []byte) []report.Finding {
- return d.DetectString(string(content))
- }
- // DetectString scans the given string and returns a list of findings
- func (d *Detector) DetectString(content string) []report.Finding {
- return d.Detect(Fragment{
- Raw: content,
- })
- }
- // detectRule scans the given fragment for the given rule and returns a list of findings
- func (d *Detector) detectRule(fragment Fragment, rule *config.Rule) []report.Finding {
- var findings []report.Finding
- // check if filepath or commit is allowed for this rule
- if rule.Allowlist.CommitAllowed(fragment.CommitSHA) ||
- rule.Allowlist.PathAllowed(fragment.FilePath) {
- return findings
- }
- if rule.Path != nil && rule.Regex == nil {
- // Path _only_ rule
- if rule.Path.Match([]byte(fragment.FilePath)) {
- finding := report.Finding{
- Description: rule.Description,
- File: fragment.FilePath,
- RuleID: rule.RuleID,
- Match: fmt.Sprintf("file detected: %s", fragment.FilePath),
- Tags: rule.Tags,
- }
- return append(findings, finding)
- }
- } else if rule.Path != nil {
- // if path is set _and_ a regex is set, then we need to check both
- // so if the path does not match, then we should return early and not
- // consider the regex
- if !rule.Path.Match([]byte(fragment.FilePath)) {
- return findings
- }
- }
- // if path only rule, skip content checks
- if rule.Regex == nil {
- return findings
- }
- containsKeyword := false
- for _, k := range rule.Keywords {
- if strings.Contains(strings.ToLower(fragment.Raw),
- strings.ToLower(k)) {
- containsKeyword = true
- break
- }
- }
- if !containsKeyword && len(rule.Keywords) != 0 {
- return findings
- }
- matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
- for _, matchIndex := range matchIndices {
- // extract secret from match
- secret := strings.Trim(fragment.Raw[matchIndex[0]:matchIndex[1]], "\n")
- // determine location of match. Note that the location
- // in the finding will be the line/column numbers of the _match_
- // not the _secret_, which will be different if the secretGroup
- // value is set for this rule
- loc := location(fragment, matchIndex)
- finding := report.Finding{
- Description: rule.Description,
- File: fragment.FilePath,
- RuleID: rule.RuleID,
- StartLine: loc.startLine,
- EndLine: loc.endLine,
- StartColumn: loc.startColumn,
- EndColumn: loc.endColumn,
- Secret: secret,
- Match: secret,
- Tags: rule.Tags,
- }
- if strings.Contains(fragment.Raw[loc.startLineIndex:loc.endLineIndex],
- gitleaksAllowSignature) {
- continue
- }
- // extract secret from secret group if set
- if rule.SecretGroup != 0 {
- groups := rule.Regex.FindStringSubmatch(secret)
- if len(groups) <= rule.SecretGroup || len(groups) == 0 {
- // Config validation should prevent this
- continue
- }
- secret = groups[rule.SecretGroup]
- finding.Secret = secret
- }
- // check if the secret is in the allowlist
- if rule.Allowlist.RegexAllowed(finding.Secret) ||
- d.Config.Allowlist.RegexAllowed(finding.Secret) {
- continue
- }
- // check entropy
- entropy := shannonEntropy(finding.Secret)
- finding.Entropy = float32(entropy)
- if rule.Entropy != 0.0 {
- if entropy <= rule.Entropy {
- // entropy is too low, skip this finding
- continue
- }
- // NOTE: this is a goofy hack to get around the fact there golang's regex engine
- // does not support positive lookaheads. Ideally we would want to add a
- // restriction on generic rules regex that requires the secret match group
- // contains both numbers and alphabetical characters, not just alphabetical characters.
- // What this bit of code does is check if the ruleid is prepended with "generic" and enforces the
- // secret contains both digits and alphabetical characters.
- // TODO: this should be replaced with stop words
- if strings.HasPrefix(rule.RuleID, "generic") {
- if !containsDigit(secret) {
- continue
- }
- }
- }
- findings = append(findings, finding)
- }
- return findings
- }
- // GitScan accepts a *gitdiff.File channel which contents a git history generated from
- // the output of `git log -p ...`. startGitScan will look at each file (patch) in the history
- // and determine if the patch contains any findings.
- func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
- var (
- gitdiffFiles <-chan *gitdiff.File
- err error
- )
- switch gitScanType {
- case DetectType:
- gitdiffFiles, err = git.GitLog(source, logOpts)
- if err != nil {
- return d.findings, err
- }
- case ProtectType:
- gitdiffFiles, err = git.GitDiff(source, false)
- if err != nil {
- return d.findings, err
- }
- case ProtectStagedType:
- gitdiffFiles, err = git.GitDiff(source, true)
- if err != nil {
- return d.findings, err
- }
- }
- s := semgroup.NewGroup(context.Background(), 4)
- for gitdiffFile := range gitdiffFiles {
- gitdiffFile := gitdiffFile
- // skip binary files
- if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
- continue
- }
- // Check if commit is allowed
- commitSHA := ""
- if gitdiffFile.PatchHeader != nil {
- commitSHA = gitdiffFile.PatchHeader.SHA
- if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
- continue
- }
- }
- d.addCommit(commitSHA)
- s.Go(func() error {
- for _, textFragment := range gitdiffFile.TextFragments {
- if textFragment == nil {
- return nil
- }
- fragment := Fragment{
- Raw: textFragment.Raw(gitdiff.OpAdd),
- CommitSHA: commitSHA,
- FilePath: gitdiffFile.NewName,
- }
- for _, finding := range d.Detect(fragment) {
- d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
- }
- }
- return nil
- })
- }
- if err := s.Wait(); err != nil {
- return d.findings, err
- }
- log.Debug().Msgf("%d commits scanned. Note: this number might be smaller than expected due to commits with no additions", len(d.commitMap))
- return d.findings, nil
- }
- // DetectFiles accepts a path to a source directory or file and begins a scan of the
- // file or directory.
- func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
- s := semgroup.NewGroup(context.Background(), 4)
- paths := make(chan string)
- s.Go(func() error {
- defer close(paths)
- return filepath.Walk(source,
- func(path string, fInfo os.FileInfo, err error) error {
- if err != nil {
- return err
- }
- if fInfo.Name() == ".git" {
- return filepath.SkipDir
- }
- if fInfo.Mode().IsRegular() {
- paths <- path
- }
- return nil
- })
- })
- for pa := range paths {
- p := pa
- s.Go(func() error {
- b, err := os.ReadFile(p)
- if err != nil {
- return err
- }
- mimetype, err := filetype.Match(b)
- if err != nil {
- return err
- }
- if mimetype.MIME.Type == "application" {
- return nil // skip binary files
- }
- fragment := Fragment{
- Raw: string(b),
- FilePath: p,
- }
- for _, finding := range d.Detect(fragment) {
- // need to add 1 since line counting starts at 1
- finding.EndLine++
- finding.StartLine++
- d.addFinding(finding)
- }
- return nil
- })
- }
- if err := s.Wait(); err != nil {
- return d.findings, err
- }
- return d.findings, nil
- }
- // Detect scans the given fragment and returns a list of findings
- func (d *Detector) Detect(fragment Fragment) []report.Finding {
- var findings []report.Finding
- // check if filepath is allowed
- if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
- fragment.FilePath == d.Config.Path) {
- return findings
- }
- // add newline indices for location calculation in detectRule
- fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
- for _, rule := range d.Config.Rules {
- findings = append(findings, d.detectRule(fragment, rule)...)
- }
- return filter(findings, d.Redact)
- }
- // addFinding synchronously adds a finding to the findings slice
- func (d *Detector) addFinding(finding report.Finding) {
- d.findingMutex.Lock()
- d.findings = append(d.findings, finding)
- if d.Verbose {
- printFinding(finding)
- }
- d.findingMutex.Unlock()
- }
- // addCommit synchronously adds a commit to the commit slice
- func (d *Detector) addCommit(commit string) {
- d.commitMap[commit] = true
- }
|