detect.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. package detect
  2. import (
  3. "context"
  4. "fmt"
  5. "os"
  6. "path/filepath"
  7. "regexp"
  8. "strings"
  9. "sync"
  10. "github.com/zricethezav/gitleaks/v8/config"
  11. "github.com/zricethezav/gitleaks/v8/detect/git"
  12. "github.com/zricethezav/gitleaks/v8/report"
  13. "github.com/fatih/semgroup"
  14. "github.com/gitleaks/go-gitdiff/gitdiff"
  15. "github.com/h2non/filetype"
  16. "github.com/rs/zerolog/log"
  17. "github.com/spf13/viper"
  18. )
  19. // Type used to differentiate between git scan types:
  20. // $ gitleaks detect
  21. // $ gitleaks protect
  22. // $ gitleaks protect staged
  23. type GitScanType int
  24. const (
  25. DetectType GitScanType = iota
  26. ProtectType
  27. ProtectStagedType
  28. gitleaksAllowSignature = "gitleaks:allow"
  29. )
  30. // Detector is the main detector struct
  31. type Detector struct {
  32. // Config is the configuration for the detector
  33. Config config.Config
  34. // Redact is a flag to redact findings. This is exported
  35. // so users using gitleaks as a library can set this flag
  36. // without calling `detector.Start(cmd *cobra.Command)`
  37. Redact bool
  38. // verbose is a flag to print findings
  39. Verbose bool
  40. // commitMap is used to keep track of commits that have been scanned.
  41. // This is only used for logging purposes and git scans.
  42. commitMap map[string]bool
  43. // findingMutex is to prevent concurrent access to the
  44. // findings slice when adding findings.
  45. findingMutex *sync.Mutex
  46. // findings is a slice of report.Findings. This is the result
  47. // of the detector's scan which can then be used to generate a
  48. // report.
  49. findings []report.Finding
  50. }
  51. // Fragment contains the data to be scanned
  52. type Fragment struct {
  53. // Raw is the raw content of the fragment
  54. Raw string
  55. // FilePath is the path to the file if applicable
  56. FilePath string
  57. // CommitSHA is the SHA of the commit if applicable
  58. CommitSHA string
  59. // newlineIndices is a list of indices of newlines in the raw content.
  60. // This is used to calculate the line location of a finding
  61. newlineIndices [][]int
  62. }
  63. // NewDetector creates a new detector with the given config
  64. func NewDetector(cfg config.Config) *Detector {
  65. return &Detector{
  66. commitMap: make(map[string]bool),
  67. findingMutex: &sync.Mutex{},
  68. findings: make([]report.Finding, 0),
  69. Config: cfg,
  70. }
  71. }
  72. // NewDetectorDefaultConfig creates a new detector with the default config
  73. func NewDetectorDefaultConfig() (*Detector, error) {
  74. viper.SetConfigType("toml")
  75. err := viper.ReadConfig(strings.NewReader(config.DefaultConfig))
  76. if err != nil {
  77. return nil, err
  78. }
  79. var vc config.ViperConfig
  80. err = viper.Unmarshal(&vc)
  81. if err != nil {
  82. return nil, err
  83. }
  84. cfg, err := vc.Translate()
  85. if err != nil {
  86. return nil, err
  87. }
  88. return NewDetector(cfg), nil
  89. }
  90. // DetectBytes scans the given bytes and returns a list of findings
  91. func (d *Detector) DetectBytes(content []byte) []report.Finding {
  92. return d.DetectString(string(content))
  93. }
  94. // DetectString scans the given string and returns a list of findings
  95. func (d *Detector) DetectString(content string) []report.Finding {
  96. return d.Detect(Fragment{
  97. Raw: content,
  98. })
  99. }
  100. // detectRule scans the given fragment for the given rule and returns a list of findings
  101. func (d *Detector) detectRule(fragment Fragment, rule *config.Rule) []report.Finding {
  102. var findings []report.Finding
  103. // check if filepath or commit is allowed for this rule
  104. if rule.Allowlist.CommitAllowed(fragment.CommitSHA) ||
  105. rule.Allowlist.PathAllowed(fragment.FilePath) {
  106. return findings
  107. }
  108. if rule.Path != nil && rule.Regex == nil {
  109. // Path _only_ rule
  110. if rule.Path.Match([]byte(fragment.FilePath)) {
  111. finding := report.Finding{
  112. Description: rule.Description,
  113. File: fragment.FilePath,
  114. RuleID: rule.RuleID,
  115. Match: fmt.Sprintf("file detected: %s", fragment.FilePath),
  116. Tags: rule.Tags,
  117. }
  118. return append(findings, finding)
  119. }
  120. } else if rule.Path != nil {
  121. // if path is set _and_ a regex is set, then we need to check both
  122. // so if the path does not match, then we should return early and not
  123. // consider the regex
  124. if !rule.Path.Match([]byte(fragment.FilePath)) {
  125. return findings
  126. }
  127. }
  128. // if path only rule, skip content checks
  129. if rule.Regex == nil {
  130. return findings
  131. }
  132. containsKeyword := false
  133. for _, k := range rule.Keywords {
  134. if strings.Contains(strings.ToLower(fragment.Raw),
  135. strings.ToLower(k)) {
  136. containsKeyword = true
  137. break
  138. }
  139. }
  140. if !containsKeyword && len(rule.Keywords) != 0 {
  141. return findings
  142. }
  143. matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
  144. for _, matchIndex := range matchIndices {
  145. // extract secret from match
  146. secret := strings.Trim(fragment.Raw[matchIndex[0]:matchIndex[1]], "\n")
  147. // determine location of match. Note that the location
  148. // in the finding will be the line/column numbers of the _match_
  149. // not the _secret_, which will be different if the secretGroup
  150. // value is set for this rule
  151. loc := location(fragment, matchIndex)
  152. finding := report.Finding{
  153. Description: rule.Description,
  154. File: fragment.FilePath,
  155. RuleID: rule.RuleID,
  156. StartLine: loc.startLine,
  157. EndLine: loc.endLine,
  158. StartColumn: loc.startColumn,
  159. EndColumn: loc.endColumn,
  160. Secret: secret,
  161. Match: secret,
  162. Tags: rule.Tags,
  163. }
  164. if strings.Contains(fragment.Raw[loc.startLineIndex:loc.endLineIndex],
  165. gitleaksAllowSignature) {
  166. continue
  167. }
  168. // extract secret from secret group if set
  169. if rule.SecretGroup != 0 {
  170. groups := rule.Regex.FindStringSubmatch(secret)
  171. if len(groups) <= rule.SecretGroup || len(groups) == 0 {
  172. // Config validation should prevent this
  173. continue
  174. }
  175. secret = groups[rule.SecretGroup]
  176. finding.Secret = secret
  177. }
  178. // check if the secret is in the allowlist
  179. if rule.Allowlist.RegexAllowed(finding.Secret) ||
  180. d.Config.Allowlist.RegexAllowed(finding.Secret) {
  181. continue
  182. }
  183. // check entropy
  184. entropy := shannonEntropy(finding.Secret)
  185. finding.Entropy = float32(entropy)
  186. if rule.Entropy != 0.0 {
  187. if entropy <= rule.Entropy {
  188. // entropy is too low, skip this finding
  189. continue
  190. }
  191. // NOTE: this is a goofy hack to get around the fact there golang's regex engine
  192. // does not support positive lookaheads. Ideally we would want to add a
  193. // restriction on generic rules regex that requires the secret match group
  194. // contains both numbers and alphabetical characters, not just alphabetical characters.
  195. // What this bit of code does is check if the ruleid is prepended with "generic" and enforces the
  196. // secret contains both digits and alphabetical characters.
  197. // TODO: this should be replaced with stop words
  198. if strings.HasPrefix(rule.RuleID, "generic") {
  199. if !containsDigit(secret) {
  200. continue
  201. }
  202. }
  203. }
  204. findings = append(findings, finding)
  205. }
  206. return findings
  207. }
  208. // GitScan accepts a *gitdiff.File channel which contents a git history generated from
  209. // the output of `git log -p ...`. startGitScan will look at each file (patch) in the history
  210. // and determine if the patch contains any findings.
  211. func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
  212. var (
  213. gitdiffFiles <-chan *gitdiff.File
  214. err error
  215. )
  216. switch gitScanType {
  217. case DetectType:
  218. gitdiffFiles, err = git.GitLog(source, logOpts)
  219. if err != nil {
  220. return d.findings, err
  221. }
  222. case ProtectType:
  223. gitdiffFiles, err = git.GitDiff(source, false)
  224. if err != nil {
  225. return d.findings, err
  226. }
  227. case ProtectStagedType:
  228. gitdiffFiles, err = git.GitDiff(source, true)
  229. if err != nil {
  230. return d.findings, err
  231. }
  232. }
  233. s := semgroup.NewGroup(context.Background(), 4)
  234. for gitdiffFile := range gitdiffFiles {
  235. gitdiffFile := gitdiffFile
  236. // skip binary files
  237. if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
  238. continue
  239. }
  240. // Check if commit is allowed
  241. commitSHA := ""
  242. if gitdiffFile.PatchHeader != nil {
  243. commitSHA = gitdiffFile.PatchHeader.SHA
  244. if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
  245. continue
  246. }
  247. }
  248. d.addCommit(commitSHA)
  249. s.Go(func() error {
  250. for _, textFragment := range gitdiffFile.TextFragments {
  251. if textFragment == nil {
  252. return nil
  253. }
  254. fragment := Fragment{
  255. Raw: textFragment.Raw(gitdiff.OpAdd),
  256. CommitSHA: commitSHA,
  257. FilePath: gitdiffFile.NewName,
  258. }
  259. for _, finding := range d.Detect(fragment) {
  260. d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
  261. }
  262. }
  263. return nil
  264. })
  265. }
  266. if err := s.Wait(); err != nil {
  267. return d.findings, err
  268. }
  269. log.Debug().Msgf("%d commits scanned. Note: this number might be smaller than expected due to commits with no additions", len(d.commitMap))
  270. return d.findings, nil
  271. }
  272. // DetectFiles accepts a path to a source directory or file and begins a scan of the
  273. // file or directory.
  274. func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
  275. s := semgroup.NewGroup(context.Background(), 4)
  276. paths := make(chan string)
  277. s.Go(func() error {
  278. defer close(paths)
  279. return filepath.Walk(source,
  280. func(path string, fInfo os.FileInfo, err error) error {
  281. if err != nil {
  282. return err
  283. }
  284. if fInfo.Name() == ".git" {
  285. return filepath.SkipDir
  286. }
  287. if fInfo.Mode().IsRegular() {
  288. paths <- path
  289. }
  290. return nil
  291. })
  292. })
  293. for pa := range paths {
  294. p := pa
  295. s.Go(func() error {
  296. b, err := os.ReadFile(p)
  297. if err != nil {
  298. return err
  299. }
  300. mimetype, err := filetype.Match(b)
  301. if err != nil {
  302. return err
  303. }
  304. if mimetype.MIME.Type == "application" {
  305. return nil // skip binary files
  306. }
  307. fragment := Fragment{
  308. Raw: string(b),
  309. FilePath: p,
  310. }
  311. for _, finding := range d.Detect(fragment) {
  312. // need to add 1 since line counting starts at 1
  313. finding.EndLine++
  314. finding.StartLine++
  315. d.addFinding(finding)
  316. }
  317. return nil
  318. })
  319. }
  320. if err := s.Wait(); err != nil {
  321. return d.findings, err
  322. }
  323. return d.findings, nil
  324. }
  325. // Detect scans the given fragment and returns a list of findings
  326. func (d *Detector) Detect(fragment Fragment) []report.Finding {
  327. var findings []report.Finding
  328. // check if filepath is allowed
  329. if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
  330. fragment.FilePath == d.Config.Path) {
  331. return findings
  332. }
  333. // add newline indices for location calculation in detectRule
  334. fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
  335. for _, rule := range d.Config.Rules {
  336. findings = append(findings, d.detectRule(fragment, rule)...)
  337. }
  338. return filter(findings, d.Redact)
  339. }
  340. // addFinding synchronously adds a finding to the findings slice
  341. func (d *Detector) addFinding(finding report.Finding) {
  342. d.findingMutex.Lock()
  343. d.findings = append(d.findings, finding)
  344. if d.Verbose {
  345. printFinding(finding)
  346. }
  347. d.findingMutex.Unlock()
  348. }
  349. // addCommit synchronously adds a commit to the commit slice
  350. func (d *Detector) addCommit(commit string) {
  351. d.commitMap[commit] = true
  352. }