detect.go 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. package detect
  2. import (
  3. "bufio"
  4. "context"
  5. "fmt"
  6. "os"
  7. "strings"
  8. "sync"
  9. "sync/atomic"
  10. "time"
  11. "github.com/zricethezav/gitleaks/v8/config"
  12. "github.com/zricethezav/gitleaks/v8/detect/codec"
  13. "github.com/zricethezav/gitleaks/v8/logging"
  14. "github.com/zricethezav/gitleaks/v8/regexp"
  15. "github.com/zricethezav/gitleaks/v8/report"
  16. "github.com/zricethezav/gitleaks/v8/sources"
  17. ahocorasick "github.com/BobuSumisu/aho-corasick"
  18. tiktoken_loader "github.com/pkoukk/tiktoken-go-loader"
  19. "github.com/zricethezav/icanhazwordz"
  20. "github.com/agnivade/levenshtein"
  21. "github.com/fatih/semgroup"
  22. "github.com/pkoukk/tiktoken-go"
  23. "github.com/rs/zerolog"
  24. "github.com/spf13/viper"
  25. "golang.org/x/exp/maps"
  26. )
  27. const (
  28. gitleaksAllowSignature = "gitleaks:allow"
  29. // SlowWarningThreshold is the amount of time to wait before logging that a file is slow.
  30. // This is useful for identifying problematic files and tuning the allowlist.
  31. SlowWarningThreshold = 5 * time.Second
  32. )
  33. var (
  34. newLineRegexp = regexp.MustCompile("\n")
  35. )
  36. // Detector is the main detector struct
  37. type Detector struct {
  38. // Config is the configuration for the detector
  39. Config config.Config
  40. // Redact is a flag to redact findings. This is exported
  41. // so users using gitleaks as a library can set this flag
  42. // without calling `detector.Start(cmd *cobra.Command)`
  43. Redact uint
  44. // verbose is a flag to print findings
  45. Verbose bool
  46. // MaxDecodeDepths limits how many recursive decoding passes are allowed
  47. MaxDecodeDepth int
  48. // MaxArchiveDepth limits how deep the sources will explore nested archives
  49. MaxArchiveDepth int
  50. // files larger than this will be skipped
  51. MaxTargetMegaBytes int
  52. // followSymlinks is a flag to enable scanning symlink files
  53. FollowSymlinks bool
  54. // NoColor is a flag to disable color output
  55. NoColor bool
  56. // IgnoreGitleaksAllow is a flag to ignore gitleaks:allow comments.
  57. IgnoreGitleaksAllow bool
  58. // commitMutex is to prevent concurrent access to the
  59. // commit map when adding commits
  60. commitMutex *sync.Mutex
  61. // commitMap is used to keep track of commits that have been scanned.
  62. // This is only used for logging purposes and git scans.
  63. commitMap map[string]bool
  64. // findingMutex is to prevent concurrent access to the
  65. // findings slice when adding findings.
  66. findingMutex *sync.Mutex
  67. // findings is a slice of report.Findings. This is the result
  68. // of the detector's scan which can then be used to generate a
  69. // report.
  70. findings []report.Finding
  71. // prefilter is a ahocorasick struct used for doing efficient string
  72. // matching given a set of words (keywords from the rules in the config)
  73. prefilter ahocorasick.Trie
  74. // a list of known findings that should be ignored
  75. baseline []report.Finding
  76. // path to baseline
  77. baselinePath string
  78. // gitleaksIgnore
  79. gitleaksIgnore map[string]struct{}
  80. // Sema (https://github.com/fatih/semgroup) controls the concurrency
  81. Sema *semgroup.Group
  82. // report-related settings.
  83. ReportPath string
  84. Reporter report.Reporter
  85. TotalBytes atomic.Uint64
  86. tokenizer *tiktoken.Tiktoken
  87. nltkSearcher *icanhazwordz.Searcher
  88. }
  89. // Fragment is an alias for sources.Fragment for backwards compatibility
  90. //
  91. // Deprecated: This will be replaced with sources.Fragment in v9
  92. type Fragment sources.Fragment
  93. // NewDetector creates a new detector with the given config
  94. func NewDetector(cfg config.Config) *Detector {
  95. // grab offline tiktoken encoder
  96. tiktoken.SetBpeLoader(tiktoken_loader.NewOfflineLoader())
  97. tke, err := tiktoken.GetEncoding("cl100k_base")
  98. if err != nil {
  99. logging.Warn().Err(err).Msgf("Could not pull down cl100k_base tiktokenizer")
  100. }
  101. return &Detector{
  102. commitMap: make(map[string]bool),
  103. gitleaksIgnore: make(map[string]struct{}),
  104. findingMutex: &sync.Mutex{},
  105. commitMutex: &sync.Mutex{},
  106. findings: make([]report.Finding, 0),
  107. Config: cfg,
  108. prefilter: *ahocorasick.NewTrieBuilder().AddStrings(maps.Keys(cfg.Keywords)).Build(),
  109. Sema: semgroup.NewGroup(context.Background(), 40),
  110. // tokenizer and nltkSearcher are used for a generic filter
  111. tokenizer: tke,
  112. nltkSearcher: icanhazwordz.NewSearcher(icanhazwordz.Filter{MinLength: 4, PreferLongestNonOverlapping: true}),
  113. }
  114. }
  115. // NewDetectorDefaultConfig creates a new detector with the default config
  116. func NewDetectorDefaultConfig() (*Detector, error) {
  117. viper.SetConfigType("toml")
  118. err := viper.ReadConfig(strings.NewReader(config.DefaultConfig))
  119. if err != nil {
  120. return nil, err
  121. }
  122. var vc config.ViperConfig
  123. err = viper.Unmarshal(&vc)
  124. if err != nil {
  125. return nil, err
  126. }
  127. cfg, err := vc.Translate()
  128. if err != nil {
  129. return nil, err
  130. }
  131. return NewDetector(cfg), nil
  132. }
  133. func (d *Detector) AddGitleaksIgnore(gitleaksIgnorePath string) error {
  134. logging.Debug().Str("path", gitleaksIgnorePath).Msgf("found .gitleaksignore file")
  135. file, err := os.Open(gitleaksIgnorePath)
  136. if err != nil {
  137. return err
  138. }
  139. defer func() {
  140. // https://github.com/securego/gosec/issues/512
  141. if err := file.Close(); err != nil {
  142. logging.Warn().Err(err).Msgf("Error closing .gitleaksignore file")
  143. }
  144. }()
  145. scanner := bufio.NewScanner(file)
  146. replacer := strings.NewReplacer("\\", "/")
  147. for scanner.Scan() {
  148. line := strings.TrimSpace(scanner.Text())
  149. // Skip lines that start with a comment
  150. if line == "" || strings.HasPrefix(line, "#") {
  151. continue
  152. }
  153. // Normalize the path.
  154. // TODO: Make this a breaking change in v9.
  155. s := strings.Split(line, ":")
  156. switch len(s) {
  157. case 3:
  158. // Global fingerprint.
  159. // `file:rule-id:start-line`
  160. s[0] = replacer.Replace(s[0])
  161. case 4:
  162. // Commit fingerprint.
  163. // `commit:file:rule-id:start-line`
  164. s[1] = replacer.Replace(s[1])
  165. default:
  166. logging.Warn().Str("fingerprint", line).Msg("Invalid .gitleaksignore entry")
  167. }
  168. d.gitleaksIgnore[strings.Join(s, ":")] = struct{}{}
  169. }
  170. return nil
  171. }
  172. // DetectBytes scans the given bytes and returns a list of findings
  173. func (d *Detector) DetectBytes(content []byte) []report.Finding {
  174. return d.DetectString(string(content))
  175. }
  176. // DetectString scans the given string and returns a list of findings
  177. func (d *Detector) DetectString(content string) []report.Finding {
  178. return d.Detect(Fragment{
  179. Raw: content,
  180. })
  181. }
  182. // DetectSource scans the given source and returns a list of findings
  183. func (d *Detector) DetectSource(ctx context.Context, source sources.Source) ([]report.Finding, error) {
  184. err := source.Fragments(ctx, func(fragment sources.Fragment, err error) error {
  185. logContext := logging.With()
  186. if len(fragment.FilePath) > 0 {
  187. logContext = logContext.Str("path", fragment.FilePath)
  188. }
  189. if len(fragment.CommitSHA) > 6 {
  190. logContext = logContext.Str("commit", fragment.CommitSHA[:7])
  191. d.addCommit(fragment.CommitSHA)
  192. } else if len(fragment.CommitSHA) > 0 {
  193. logContext = logContext.Str("commit", fragment.CommitSHA)
  194. d.addCommit(fragment.CommitSHA)
  195. logger := logContext.Logger()
  196. logger.Warn().Msg("commit SHAs should be >= 7 characters long")
  197. }
  198. logger := logContext.Logger()
  199. if err != nil {
  200. // Log the error and move on to the next fragment
  201. logger.Error().Err(err).Send()
  202. return nil
  203. }
  204. // both the fragment's content and path should be empty for it to be
  205. // considered empty at this point because of path based matches
  206. if len(fragment.Raw) == 0 && len(fragment.FilePath) == 0 {
  207. logger.Trace().Msg("skipping empty fragment")
  208. return nil
  209. }
  210. var timer *time.Timer
  211. // Only start the timer in debug mode
  212. if logger.GetLevel() <= zerolog.DebugLevel {
  213. timer = time.AfterFunc(SlowWarningThreshold, func() {
  214. logger.Debug().Msgf("Taking longer than %s to inspect fragment", SlowWarningThreshold.String())
  215. })
  216. }
  217. for _, finding := range d.Detect(Fragment(fragment)) {
  218. d.AddFinding(finding)
  219. }
  220. // Stop the timer if it was created
  221. if timer != nil {
  222. timer.Stop()
  223. }
  224. return nil
  225. })
  226. if _, isGit := source.(*sources.Git); isGit {
  227. logging.Info().Msgf("%d commits scanned.", len(d.commitMap))
  228. logging.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
  229. }
  230. return d.Findings(), err
  231. }
  232. // Detect scans the given fragment and returns a list of findings
  233. func (d *Detector) Detect(fragment Fragment) []report.Finding {
  234. if fragment.Bytes == nil {
  235. d.TotalBytes.Add(uint64(len(fragment.Raw)))
  236. }
  237. d.TotalBytes.Add(uint64(len(fragment.Bytes)))
  238. var (
  239. findings []report.Finding
  240. logger = func() zerolog.Logger {
  241. l := logging.With().Str("path", fragment.FilePath)
  242. if fragment.CommitSHA != "" {
  243. l = l.Str("commit", fragment.CommitSHA)
  244. }
  245. return l.Logger()
  246. }()
  247. )
  248. // check if filepath is allowed
  249. if fragment.FilePath != "" {
  250. // is the path our config or baseline file?
  251. if fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath) {
  252. logging.Trace().Msg("skipping file: matches config or baseline path")
  253. return findings
  254. }
  255. }
  256. // check if commit or filepath is allowed.
  257. if isAllowed, event := checkCommitOrPathAllowed(logger, fragment, d.Config.Allowlists); isAllowed {
  258. event.Msg("skipping file: global allowlist")
  259. return findings
  260. }
  261. // setup variables to handle different decoding passes
  262. currentRaw := fragment.Raw
  263. encodedSegments := []*codec.EncodedSegment{}
  264. currentDecodeDepth := 0
  265. decoder := codec.NewDecoder()
  266. for {
  267. // build keyword map for prefiltering rules
  268. keywords := make(map[string]bool)
  269. normalizedRaw := strings.ToLower(currentRaw)
  270. matches := d.prefilter.MatchString(normalizedRaw)
  271. for _, m := range matches {
  272. keywords[normalizedRaw[m.Pos():int(m.Pos())+len(m.Match())]] = true
  273. }
  274. for _, rule := range d.Config.Rules {
  275. if len(rule.Keywords) == 0 {
  276. // if no keywords are associated with the rule always scan the
  277. // fragment using the rule
  278. findings = append(findings, d.detectRule(fragment, currentRaw, rule, encodedSegments)...)
  279. continue
  280. }
  281. // check if keywords are in the fragment
  282. for _, k := range rule.Keywords {
  283. if _, ok := keywords[strings.ToLower(k)]; ok {
  284. findings = append(findings, d.detectRule(fragment, currentRaw, rule, encodedSegments)...)
  285. break
  286. }
  287. }
  288. }
  289. // increment the depth by 1 as we start our decoding pass
  290. currentDecodeDepth++
  291. // stop the loop if we've hit our max decoding depth
  292. if currentDecodeDepth > d.MaxDecodeDepth {
  293. break
  294. }
  295. // decode the currentRaw for the next pass
  296. currentRaw, encodedSegments = decoder.Decode(currentRaw, encodedSegments)
  297. // stop the loop when there's nothing else to decode
  298. if len(encodedSegments) == 0 {
  299. break
  300. }
  301. }
  302. return filter(findings, d.Redact)
  303. }
  304. // detectRule scans the given fragment for the given rule and returns a list of findings
  305. func (d *Detector) detectRule(fragment Fragment, currentRaw string, r config.Rule, encodedSegments []*codec.EncodedSegment) []report.Finding {
  306. var (
  307. findings []report.Finding
  308. logger = func() zerolog.Logger {
  309. l := logging.With().Str("rule-id", r.RuleID).Str("path", fragment.FilePath)
  310. if fragment.CommitSHA != "" {
  311. l = l.Str("commit", fragment.CommitSHA)
  312. }
  313. return l.Logger()
  314. }()
  315. )
  316. if r.SkipReport && !fragment.InheritedFromFinding {
  317. return findings
  318. }
  319. // check if commit or file is allowed for this rule.
  320. if isAllowed, event := checkCommitOrPathAllowed(logger, fragment, r.Allowlists); isAllowed {
  321. event.Msg("skipping file: rule allowlist")
  322. return findings
  323. }
  324. if r.Path != nil {
  325. if r.Regex == nil && len(encodedSegments) == 0 {
  326. // Path _only_ rule
  327. if r.Path.MatchString(fragment.FilePath) || (fragment.WindowsFilePath != "" && r.Path.MatchString(fragment.WindowsFilePath)) {
  328. finding := report.Finding{
  329. Commit: fragment.CommitSHA,
  330. RuleID: r.RuleID,
  331. Description: r.Description,
  332. File: fragment.FilePath,
  333. SymlinkFile: fragment.SymlinkFile,
  334. Match: "file detected: " + fragment.FilePath,
  335. Tags: r.Tags,
  336. }
  337. if fragment.CommitInfo != nil {
  338. finding.Author = fragment.CommitInfo.AuthorName
  339. finding.Date = fragment.CommitInfo.Date
  340. finding.Email = fragment.CommitInfo.AuthorEmail
  341. finding.Link = createScmLink(fragment.CommitInfo.Remote, finding)
  342. finding.Message = fragment.CommitInfo.Message
  343. }
  344. return append(findings, finding)
  345. }
  346. } else {
  347. // if path is set _and_ a regex is set, then we need to check both
  348. // so if the path does not match, then we should return early and not
  349. // consider the regex
  350. if !(r.Path.MatchString(fragment.FilePath) || (fragment.WindowsFilePath != "" && r.Path.MatchString(fragment.WindowsFilePath))) {
  351. return findings
  352. }
  353. }
  354. }
  355. // if path only rule, skip content checks
  356. if r.Regex == nil {
  357. return findings
  358. }
  359. // if flag configure and raw data size bigger then the flag
  360. if d.MaxTargetMegaBytes > 0 {
  361. rawLength := len(currentRaw) / 1_000_000
  362. if rawLength > d.MaxTargetMegaBytes {
  363. logger.Debug().
  364. Int("size", rawLength).
  365. Int("max-size", d.MaxTargetMegaBytes).
  366. Msg("skipping fragment: size")
  367. return findings
  368. }
  369. }
  370. matches := r.Regex.FindAllStringIndex(currentRaw, -1)
  371. if len(matches) == 0 {
  372. return findings
  373. }
  374. // TODO profile this, probably should replace with something more efficient
  375. newlineIndices := newLineRegexp.FindAllStringIndex(fragment.Raw, -1)
  376. // use currentRaw instead of fragment.Raw since this represents the current
  377. // decoding pass on the text
  378. for _, matchIndex := range r.Regex.FindAllStringIndex(currentRaw, -1) {
  379. // Extract secret from match
  380. secret := strings.Trim(currentRaw[matchIndex[0]:matchIndex[1]], "\n")
  381. // For any meta data from decoding
  382. var metaTags []string
  383. currentLine := ""
  384. // Check if the decoded portions of the segment overlap with the match
  385. // to see if its potentially a new match
  386. if len(encodedSegments) > 0 {
  387. segments := codec.SegmentsWithDecodedOverlap(encodedSegments, matchIndex[0], matchIndex[1])
  388. if len(segments) == 0 {
  389. // This item has already been added to a finding
  390. continue
  391. }
  392. matchIndex = codec.AdjustMatchIndex(segments, matchIndex)
  393. metaTags = append(metaTags, codec.Tags(segments)...)
  394. currentLine = codec.CurrentLine(segments, currentRaw)
  395. } else {
  396. // Fixes: https://github.com/gitleaks/gitleaks/issues/1352
  397. // removes the incorrectly following line that was detected by regex expression '\n'
  398. matchIndex[1] = matchIndex[0] + len(secret)
  399. }
  400. // determine location of match. Note that the location
  401. // in the finding will be the line/column numbers of the _match_
  402. // not the _secret_, which will be different if the secretGroup
  403. // value is set for this rule
  404. loc := location(newlineIndices, fragment.Raw, matchIndex)
  405. if matchIndex[1] > loc.endLineIndex {
  406. loc.endLineIndex = matchIndex[1]
  407. }
  408. finding := report.Finding{
  409. Commit: fragment.CommitSHA,
  410. RuleID: r.RuleID,
  411. Description: r.Description,
  412. StartLine: fragment.StartLine + loc.startLine,
  413. EndLine: fragment.StartLine + loc.endLine,
  414. StartColumn: loc.startColumn,
  415. EndColumn: loc.endColumn,
  416. Line: fragment.Raw[loc.startLineIndex:loc.endLineIndex],
  417. Match: secret,
  418. Secret: secret,
  419. File: fragment.FilePath,
  420. SymlinkFile: fragment.SymlinkFile,
  421. Tags: append(r.Tags, metaTags...),
  422. }
  423. if fragment.CommitInfo != nil {
  424. finding.Author = fragment.CommitInfo.AuthorName
  425. finding.Date = fragment.CommitInfo.Date
  426. finding.Email = fragment.CommitInfo.AuthorEmail
  427. finding.Link = createScmLink(fragment.CommitInfo.Remote, finding)
  428. finding.Message = fragment.CommitInfo.Message
  429. }
  430. if !d.IgnoreGitleaksAllow && strings.Contains(finding.Line, gitleaksAllowSignature) {
  431. logger.Trace().
  432. Str("finding", finding.Secret).
  433. Msg("skipping finding: 'gitleaks:allow' signature")
  434. continue
  435. }
  436. if currentLine == "" {
  437. currentLine = finding.Line
  438. }
  439. // Set the value of |secret|, if the pattern contains at least one capture group.
  440. // (The first element is the full match, hence we check >= 2.)
  441. groups := r.Regex.FindStringSubmatch(finding.Secret)
  442. if len(groups) >= 2 {
  443. if r.SecretGroup > 0 {
  444. if len(groups) <= r.SecretGroup {
  445. // Config validation should prevent this
  446. continue
  447. }
  448. finding.Secret = groups[r.SecretGroup]
  449. } else {
  450. // If |secretGroup| is not set, we will use the first suitable capture group.
  451. for _, s := range groups[1:] {
  452. if len(s) > 0 {
  453. finding.Secret = s
  454. break
  455. }
  456. }
  457. }
  458. }
  459. // check entropy
  460. entropy := shannonEntropy(finding.Secret)
  461. finding.Entropy = float32(entropy)
  462. if r.Entropy != 0.0 {
  463. // entropy is too low, skip this finding
  464. if entropy <= r.Entropy {
  465. logger.Trace().
  466. Str("finding", finding.Secret).
  467. Float32("entropy", finding.Entropy).
  468. Msg("skipping finding: low entropy")
  469. continue
  470. }
  471. }
  472. // check if the result matches any of the global allowlists.
  473. if isAllowed, event := checkFindingAllowed(logger, finding, fragment, currentLine, d.Config.Allowlists); isAllowed {
  474. event.Msg("skipping finding: global allowlist")
  475. continue
  476. }
  477. // check if the result matches any of the rule allowlists.
  478. if isAllowed, event := checkFindingAllowed(logger, finding, fragment, currentLine, r.Allowlists); isAllowed {
  479. event.Msg("skipping finding: rule allowlist")
  480. continue
  481. }
  482. if r.SmartFilter {
  483. if !d.passesSmartFilter(finding.Secret) {
  484. continue
  485. }
  486. }
  487. findings = append(findings, finding)
  488. }
  489. // Handle required rules (multi-part rules)
  490. if fragment.InheritedFromFinding || len(r.RequiredRules) == 0 {
  491. return findings
  492. }
  493. // Process required rules and create findings with auxiliary findings
  494. return d.processRequiredRules(fragment, currentRaw, r, encodedSegments, findings, logger)
  495. }
  496. // passesSmartFilter applies heuristics to determine if a string is likely a real looking secret
  497. // rather than random text or common words. It uses token density, character distribution,
  498. // and word analysis to filter out false positives. Returns true if the string passes
  499. // the filter (likely a secret), false if it should be skipped.
  500. func (d *Detector) passesSmartFilter(secret string) bool {
  501. tokens := d.tokenizer.Encode(secret, nil, nil)
  502. tokenLen := len(tokens)
  503. // token vals < 100
  504. numShortTokens := 0
  505. for _, t := range tokens {
  506. if t < 100 {
  507. numShortTokens++
  508. }
  509. }
  510. // token vals > 100
  511. // longTokens := tokenLen - numShortTokens
  512. density := len(secret) / tokenLen
  513. shortTokenRatio := float32(numShortTokens / tokenLen)
  514. result := d.nltkSearcher.Find(secret)
  515. fourPlusCharWords := len(result.Matches)
  516. // check if the secret has a close levenshtein distance to any of the results
  517. // if it does, consider this c4. normalize cases
  518. c4 := false
  519. secretLower := strings.ToLower(secret)
  520. for _, match := range result.Matches {
  521. // Only check against words with 5+ characters
  522. if len(match.Word) <= 5 {
  523. continue
  524. }
  525. wordLower := strings.ToLower(match.Word)
  526. distance := levenshtein.ComputeDistance(secretLower, wordLower)
  527. // Consider it close if distance is <= 2 or <= 20% of the longer string length
  528. maxLen := max(len(secretLower), len(wordLower))
  529. threshold := max(2, maxLen/5)
  530. if distance <= threshold {
  531. c4 = true
  532. break
  533. }
  534. // OR check if the match is 6 characters or more _and_ is a substring of the
  535. // potential secret
  536. if len(match.Word) <= 6 {
  537. continue
  538. }
  539. if strings.Contains(strings.ToLower(secret), strings.ToLower(match.Word)) {
  540. c4 = true
  541. }
  542. }
  543. c1 := density <= 2.0
  544. c2 := shortTokenRatio >= 0.25
  545. c3 := len(secret) >= 9 && float32(density) <= 2.5
  546. likelySecret := c1 || c2 || c3
  547. // Check for irregularly cased English words
  548. // majorityIrregularlyCased := false
  549. hasIrregularCasing := false
  550. for _, word := range result.UniqueWords {
  551. if isIrregularlyCased(word, secret) {
  552. hasIrregularCasing = true
  553. break
  554. }
  555. }
  556. // if len(result.UniqueWords) > 0 && irregularlyCased > len(result.UniqueWords)/2 {
  557. // majorityIrregularlyCased = true
  558. // }
  559. // fmt.Println(hasIrregularCasing, fourPlusCharWords, secret)
  560. // fmt.Println(c1, c2, c3, fourPlusCharWords)
  561. // Make exception if words have irregular casing
  562. if hasIrregularCasing && fourPlusCharWords > 1 {
  563. pass := likelySecret
  564. if !pass {
  565. // fmt.Println("skipping: ", secret, "--", c1, c2, c3, fourPlusCharWords, result.UniqueWords, "(irregular casing exception)")
  566. }
  567. return pass
  568. }
  569. pass := likelySecret && !(fourPlusCharWords > 1) && !c4
  570. if !pass {
  571. // fmt.Println("skipping: ", secret, "--", c1, c2, c3, fourPlusCharWords, result.UniqueWords)
  572. }
  573. return pass
  574. }
  575. // processRequiredRules handles the logic for multi-part rules with auxiliary findings
  576. func (d *Detector) processRequiredRules(fragment Fragment, currentRaw string, r config.Rule, encodedSegments []*codec.EncodedSegment, primaryFindings []report.Finding, logger zerolog.Logger) []report.Finding {
  577. if len(primaryFindings) == 0 {
  578. logger.Debug().Msg("no primary findings to process for required rules")
  579. return primaryFindings
  580. }
  581. // Pre-collect all required rule findings once
  582. allRequiredFindings := make(map[string][]report.Finding)
  583. for _, requiredRule := range r.RequiredRules {
  584. rule, ok := d.Config.Rules[requiredRule.RuleID]
  585. if !ok {
  586. logger.Error().Str("rule-id", requiredRule.RuleID).Msg("required rule not found in config")
  587. continue
  588. }
  589. // Mark fragment as inherited to prevent infinite recursion
  590. inheritedFragment := fragment
  591. inheritedFragment.InheritedFromFinding = true
  592. // Call detectRule once for each required rule
  593. requiredFindings := d.detectRule(inheritedFragment, currentRaw, rule, encodedSegments)
  594. allRequiredFindings[requiredRule.RuleID] = requiredFindings
  595. logger.Debug().
  596. Str("rule-id", requiredRule.RuleID).
  597. Int("findings", len(requiredFindings)).
  598. Msg("collected required rule findings")
  599. }
  600. var finalFindings []report.Finding
  601. // Now process each primary finding against the pre-collected required findings
  602. for _, primaryFinding := range primaryFindings {
  603. var requiredFindings []*report.RequiredFinding
  604. for _, requiredRule := range r.RequiredRules {
  605. foundRequiredFindings, exists := allRequiredFindings[requiredRule.RuleID]
  606. if !exists {
  607. continue // Rule wasn't found earlier, skip
  608. }
  609. // Filter findings that are within proximity of the primary finding
  610. for _, requiredFinding := range foundRequiredFindings {
  611. if d.withinProximity(primaryFinding, requiredFinding, requiredRule) {
  612. req := &report.RequiredFinding{
  613. RuleID: requiredFinding.RuleID,
  614. StartLine: requiredFinding.StartLine,
  615. EndLine: requiredFinding.EndLine,
  616. StartColumn: requiredFinding.StartColumn,
  617. EndColumn: requiredFinding.EndColumn,
  618. Line: requiredFinding.Line,
  619. Match: requiredFinding.Match,
  620. Secret: requiredFinding.Secret,
  621. }
  622. requiredFindings = append(requiredFindings, req)
  623. }
  624. }
  625. }
  626. // Check if we have at least one auxiliary finding for each required rule
  627. if len(requiredFindings) > 0 && d.hasAllRequiredRules(requiredFindings, r.RequiredRules) {
  628. // Create a finding with auxiliary findings
  629. newFinding := primaryFinding // Copy the primary finding
  630. newFinding.AddRequiredFindings(requiredFindings)
  631. finalFindings = append(finalFindings, newFinding)
  632. logger.Debug().
  633. Str("primary-rule", r.RuleID).
  634. Int("primary-line", primaryFinding.StartLine).
  635. Int("auxiliary-count", len(requiredFindings)).
  636. Msg("multi-part rule satisfied")
  637. }
  638. }
  639. return finalFindings
  640. }
  641. // hasAllRequiredRules checks if we have at least one auxiliary finding for each required rule
  642. func (d *Detector) hasAllRequiredRules(auxiliaryFindings []*report.RequiredFinding, requiredRules []*config.Required) bool {
  643. foundRules := make(map[string]bool)
  644. // AuxiliaryFinding
  645. for _, aux := range auxiliaryFindings {
  646. foundRules[aux.RuleID] = true
  647. }
  648. for _, required := range requiredRules {
  649. if !foundRules[required.RuleID] {
  650. return false
  651. }
  652. }
  653. return true
  654. }
  655. func (d *Detector) withinProximity(primary, required report.Finding, requiredRule *config.Required) bool {
  656. // fmt.Println(requiredRule.WithinLines)
  657. // If neither within_lines nor within_columns is set, findings just need to be in the same fragment
  658. if requiredRule.WithinLines == nil && requiredRule.WithinColumns == nil {
  659. return true
  660. }
  661. // Check line proximity (vertical distance)
  662. if requiredRule.WithinLines != nil {
  663. lineDiff := abs(primary.StartLine - required.StartLine)
  664. if lineDiff > *requiredRule.WithinLines {
  665. return false
  666. }
  667. }
  668. // Check column proximity (horizontal distance)
  669. if requiredRule.WithinColumns != nil {
  670. // Use the start column of each finding for proximity calculation
  671. colDiff := abs(primary.StartColumn - required.StartColumn)
  672. if colDiff > *requiredRule.WithinColumns {
  673. return false
  674. }
  675. }
  676. return true
  677. }
  678. // abs returns the absolute value of an integer
  679. func abs(x int) int {
  680. if x < 0 {
  681. return -x
  682. }
  683. return x
  684. }
  685. // AddFinding synchronously adds a finding to the findings slice
  686. func (d *Detector) AddFinding(finding report.Finding) {
  687. globalFingerprint := fmt.Sprintf("%s:%s:%d", finding.File, finding.RuleID, finding.StartLine)
  688. if finding.Commit != "" {
  689. finding.Fingerprint = fmt.Sprintf("%s:%s:%s:%d", finding.Commit, finding.File, finding.RuleID, finding.StartLine)
  690. } else {
  691. finding.Fingerprint = globalFingerprint
  692. }
  693. // check if we should ignore this finding
  694. logger := logging.With().Str("finding", finding.Secret).Logger()
  695. if _, ok := d.gitleaksIgnore[globalFingerprint]; ok {
  696. logger.Debug().
  697. Str("fingerprint", globalFingerprint).
  698. Msg("skipping finding: global fingerprint")
  699. return
  700. } else if finding.Commit != "" {
  701. // Awkward nested if because I'm not sure how to chain these two conditions.
  702. if _, ok := d.gitleaksIgnore[finding.Fingerprint]; ok {
  703. logger.Debug().
  704. Str("fingerprint", finding.Fingerprint).
  705. Msgf("skipping finding: fingerprint")
  706. return
  707. }
  708. }
  709. if d.baseline != nil && !IsNew(finding, d.Redact, d.baseline) {
  710. logger.Debug().
  711. Str("fingerprint", finding.Fingerprint).
  712. Msgf("skipping finding: baseline")
  713. return
  714. }
  715. d.findingMutex.Lock()
  716. d.findings = append(d.findings, finding)
  717. if d.Verbose {
  718. printFinding(finding, d.NoColor)
  719. }
  720. d.findingMutex.Unlock()
  721. }
  722. // Findings returns the findings added to the detector
  723. func (d *Detector) Findings() []report.Finding {
  724. return d.findings
  725. }
  726. // AddCommit synchronously adds a commit to the commit slice
  727. func (d *Detector) addCommit(commit string) {
  728. d.commitMutex.Lock()
  729. d.commitMap[commit] = true
  730. d.commitMutex.Unlock()
  731. }
  732. // checkCommitOrPathAllowed evaluates |fragment| against all provided |allowlists|.
  733. //
  734. // If the match condition is "OR", only commit and path are checked.
  735. // Otherwise, if regexes or stopwords are defined this will fail.
  736. func checkCommitOrPathAllowed(
  737. logger zerolog.Logger,
  738. fragment Fragment,
  739. allowlists []*config.Allowlist,
  740. ) (bool, *zerolog.Event) {
  741. if fragment.FilePath == "" && fragment.CommitSHA == "" {
  742. return false, nil
  743. }
  744. for _, a := range allowlists {
  745. var (
  746. isAllowed bool
  747. allowlistChecks []bool
  748. commitAllowed, _ = a.CommitAllowed(fragment.CommitSHA)
  749. pathAllowed = a.PathAllowed(fragment.FilePath) || (fragment.WindowsFilePath != "" && a.PathAllowed(fragment.WindowsFilePath))
  750. )
  751. // If the condition is "AND" we need to check all conditions.
  752. if a.MatchCondition == config.AllowlistMatchAnd {
  753. if len(a.Commits) > 0 {
  754. allowlistChecks = append(allowlistChecks, commitAllowed)
  755. }
  756. if len(a.Paths) > 0 {
  757. allowlistChecks = append(allowlistChecks, pathAllowed)
  758. }
  759. // These will be checked later.
  760. if len(a.Regexes) > 0 {
  761. continue
  762. }
  763. if len(a.StopWords) > 0 {
  764. continue
  765. }
  766. isAllowed = allTrue(allowlistChecks)
  767. } else {
  768. isAllowed = commitAllowed || pathAllowed
  769. }
  770. if isAllowed {
  771. event := logger.Trace().Str("condition", a.MatchCondition.String())
  772. if commitAllowed {
  773. event.Bool("allowed-commit", commitAllowed)
  774. }
  775. if pathAllowed {
  776. event.Bool("allowed-path", pathAllowed)
  777. }
  778. return true, event
  779. }
  780. }
  781. return false, nil
  782. }
  783. // checkFindingAllowed evaluates |finding| against all provided |allowlists|.
  784. //
  785. // If the match condition is "OR", only regex and stopwords are run. (Commit and path should be handled separately).
  786. // Otherwise, all conditions are checked.
  787. //
  788. // TODO: The method signature is awkward. I can't think of a better way to log helpful info.
  789. func checkFindingAllowed(
  790. logger zerolog.Logger,
  791. finding report.Finding,
  792. fragment Fragment,
  793. currentLine string,
  794. allowlists []*config.Allowlist,
  795. ) (bool, *zerolog.Event) {
  796. for _, a := range allowlists {
  797. allowlistTarget := finding.Secret
  798. switch a.RegexTarget {
  799. case "match":
  800. allowlistTarget = finding.Match
  801. case "line":
  802. allowlistTarget = currentLine
  803. }
  804. var (
  805. checks []bool
  806. isAllowed bool
  807. commitAllowed bool
  808. commit string
  809. pathAllowed bool
  810. regexAllowed = a.RegexAllowed(allowlistTarget)
  811. containsStopword, word = a.ContainsStopWord(finding.Secret)
  812. )
  813. // If the condition is "AND" we need to check all conditions.
  814. if a.MatchCondition == config.AllowlistMatchAnd {
  815. // Determine applicable checks.
  816. if len(a.Commits) > 0 {
  817. commitAllowed, commit = a.CommitAllowed(fragment.CommitSHA)
  818. checks = append(checks, commitAllowed)
  819. }
  820. if len(a.Paths) > 0 {
  821. pathAllowed = a.PathAllowed(fragment.FilePath) || (fragment.WindowsFilePath != "" && a.PathAllowed(fragment.WindowsFilePath))
  822. checks = append(checks, pathAllowed)
  823. }
  824. if len(a.Regexes) > 0 {
  825. checks = append(checks, regexAllowed)
  826. }
  827. if len(a.StopWords) > 0 {
  828. checks = append(checks, containsStopword)
  829. }
  830. isAllowed = allTrue(checks)
  831. } else {
  832. isAllowed = regexAllowed || containsStopword
  833. }
  834. if isAllowed {
  835. event := logger.Trace().
  836. Str("finding", finding.Secret).
  837. Str("condition", a.MatchCondition.String())
  838. if commitAllowed {
  839. event.Str("allowed-commit", commit)
  840. }
  841. if pathAllowed {
  842. event.Bool("allowed-path", pathAllowed)
  843. }
  844. if regexAllowed {
  845. event.Bool("allowed-regex", regexAllowed)
  846. }
  847. if containsStopword {
  848. event.Str("allowed-stopword", word)
  849. }
  850. return true, event
  851. }
  852. }
  853. return false, nil
  854. }
  855. func allTrue(bools []bool) bool {
  856. for _, check := range bools {
  857. if !check {
  858. return false
  859. }
  860. }
  861. return true
  862. }