4
0

detect.go 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. package detect
  2. import (
  3. "context"
  4. "fmt"
  5. "os"
  6. "path/filepath"
  7. "regexp"
  8. "strings"
  9. "sync"
  10. "github.com/zricethezav/gitleaks/v8/config"
  11. "github.com/zricethezav/gitleaks/v8/detect/git"
  12. "github.com/zricethezav/gitleaks/v8/report"
  13. "github.com/fatih/semgroup"
  14. "github.com/gitleaks/go-gitdiff/gitdiff"
  15. "github.com/rs/zerolog/log"
  16. "github.com/spf13/viper"
  17. )
  18. // Type used to differentiate between git scan types:
  19. // $ gitleaks detect
  20. // $ gitleaks protect
  21. // $ gitleaks protect staged
  22. type GitScanType int
  23. const (
  24. DetectType GitScanType = iota
  25. ProtectType
  26. ProtectStagedType
  27. )
  28. // Detector is the main detector struct
  29. type Detector struct {
  30. // Config is the configuration for the detector
  31. Config config.Config
  32. // Redact is a flag to redact findings. This is exported
  33. // so users using gitleaks as a library can set this flag
  34. // without calling `detector.Start(cmd *cobra.Command)`
  35. Redact bool
  36. // verbose is a flag to print findings
  37. Verbose bool
  38. // commitMap is used to keep track of commits that have been scanned.
  39. // This is only used for logging purposes and git scans.
  40. commitMap map[string]bool
  41. // findingMutex is to prevent concurrent access to the
  42. // findings slice when adding findings.
  43. findingMutex *sync.Mutex
  44. // findings is a slice of report.Findings. This is the result
  45. // of the detector's scan which can then be used to generate a
  46. // report.
  47. findings []report.Finding
  48. }
  49. // Fragment contains the data to be scanned
  50. type Fragment struct {
  51. // Raw is the raw content of the fragment
  52. Raw string
  53. // FilePath is the path to the file if applicable
  54. FilePath string
  55. // CommitSHA is the SHA of the commit if applicable
  56. CommitSHA string
  57. // newlineIndices is a list of indices of newlines in the raw content.
  58. // This is used to calculate the line location of a finding
  59. newlineIndices [][]int
  60. }
  61. // NewDetector creates a new detector with the given config
  62. func NewDetector(cfg config.Config) *Detector {
  63. return &Detector{
  64. commitMap: make(map[string]bool),
  65. findingMutex: &sync.Mutex{},
  66. findings: make([]report.Finding, 0),
  67. Config: cfg,
  68. }
  69. }
  70. // NewDetectorDefaultConfig creates a new detector with the default config
  71. func NewDetectorDefaultConfig() (*Detector, error) {
  72. viper.SetConfigType("toml")
  73. err := viper.ReadConfig(strings.NewReader(config.DefaultConfig))
  74. if err != nil {
  75. return nil, err
  76. }
  77. var vc config.ViperConfig
  78. err = viper.Unmarshal(&vc)
  79. if err != nil {
  80. return nil, err
  81. }
  82. cfg, err := vc.Translate()
  83. if err != nil {
  84. return nil, err
  85. }
  86. return NewDetector(cfg), nil
  87. }
  88. // DetectBytes scans the given bytes and returns a list of findings
  89. func (d *Detector) DetectBytes(content []byte) []report.Finding {
  90. return d.DetectString(string(content))
  91. }
  92. // DetectString scans the given string and returns a list of findings
  93. func (d *Detector) DetectString(content string) []report.Finding {
  94. return d.Detect(Fragment{
  95. Raw: content,
  96. })
  97. }
  98. // detectRule scans the given fragment for the given rule and returns a list of findings
  99. func (d *Detector) detectRule(fragment Fragment, rule *config.Rule) []report.Finding {
  100. var findings []report.Finding
  101. // check if filepath or commit is allowed for this rule
  102. if rule.Allowlist.CommitAllowed(fragment.CommitSHA) ||
  103. rule.Allowlist.PathAllowed(fragment.FilePath) {
  104. return findings
  105. }
  106. if rule.Path != nil && rule.Regex == nil {
  107. // Path _only_ rule
  108. if rule.Path.Match([]byte(fragment.FilePath)) {
  109. finding := report.Finding{
  110. Description: rule.Description,
  111. File: fragment.FilePath,
  112. RuleID: rule.RuleID,
  113. Match: fmt.Sprintf("file detected: %s", fragment.FilePath),
  114. Tags: rule.Tags,
  115. }
  116. return append(findings, finding)
  117. }
  118. } else if rule.Path != nil {
  119. // if path is set _and_ a regex is set, then we need to check both
  120. // so if the path does not match, then we should return early and not
  121. // consider the regex
  122. if !rule.Path.Match([]byte(fragment.FilePath)) {
  123. return findings
  124. }
  125. }
  126. matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
  127. for _, matchIndex := range matchIndices {
  128. // extract secret from match
  129. secret := strings.Trim(fragment.Raw[matchIndex[0]:matchIndex[1]], "\n")
  130. // determine location of match. Note that the location
  131. // in the finding will be the line/column numbers of the _match_
  132. // not the _secret_, which will be different if the secretGroup
  133. // value is set for this rule
  134. loc := location(fragment, matchIndex)
  135. finding := report.Finding{
  136. Description: rule.Description,
  137. File: fragment.FilePath,
  138. RuleID: rule.RuleID,
  139. StartLine: loc.startLine,
  140. EndLine: loc.endLine,
  141. StartColumn: loc.startColumn,
  142. EndColumn: loc.endColumn,
  143. Secret: secret,
  144. Match: secret,
  145. Tags: rule.Tags,
  146. }
  147. // extract secret from secret group if set
  148. if rule.SecretGroup != 0 {
  149. groups := rule.Regex.FindStringSubmatch(secret)
  150. if len(groups) <= rule.SecretGroup || len(groups) == 0 {
  151. // Config validation should prevent this
  152. continue
  153. }
  154. secret = groups[rule.SecretGroup]
  155. finding.Secret = secret
  156. }
  157. // check if the secret is in the allowlist
  158. if rule.Allowlist.RegexAllowed(finding.Secret) ||
  159. d.Config.Allowlist.RegexAllowed(finding.Secret) {
  160. continue
  161. }
  162. // check entropy
  163. entropy := shannonEntropy(finding.Secret)
  164. finding.Entropy = float32(entropy)
  165. if rule.Entropy != 0.0 {
  166. if entropy <= rule.Entropy {
  167. // entropy is too low, skip this finding
  168. continue
  169. }
  170. // NOTE: this is a goofy hack to get around the fact there golang's regex engine
  171. // does not support positive lookaheads. Ideally we would want to add a
  172. // restriction on generic rules regex that requires the secret match group
  173. // contains both numbers and alphabetical characters, not just alphabetical characters.
  174. // What this bit of code does is check if the ruleid is prepended with "generic" and enforces the
  175. // secret contains both digits and alphabetical characters.
  176. // TODO: this should be replaced with stop words
  177. if strings.HasPrefix(rule.RuleID, "generic") {
  178. if !containsDigit(secret) {
  179. continue
  180. }
  181. }
  182. }
  183. findings = append(findings, finding)
  184. }
  185. return findings
  186. }
  187. // GitScan accepts a *gitdiff.File channel which contents a git history generated from
  188. // the output of `git log -p ...`. startGitScan will look at each file (patch) in the history
  189. // and determine if the patch contains any findings.
  190. func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
  191. var (
  192. gitdiffFiles <-chan *gitdiff.File
  193. err error
  194. )
  195. switch gitScanType {
  196. case DetectType:
  197. gitdiffFiles, err = git.GitLog(source, logOpts)
  198. if err != nil {
  199. return d.findings, err
  200. }
  201. case ProtectType:
  202. gitdiffFiles, err = git.GitDiff(source, false)
  203. if err != nil {
  204. return d.findings, err
  205. }
  206. case ProtectStagedType:
  207. gitdiffFiles, err = git.GitDiff(source, true)
  208. if err != nil {
  209. return d.findings, err
  210. }
  211. }
  212. s := semgroup.NewGroup(context.Background(), 4)
  213. for gitdiffFile := range gitdiffFiles {
  214. gitdiffFile := gitdiffFile
  215. // skip binary files
  216. if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
  217. continue
  218. }
  219. // Check if commit is allowed
  220. commitSHA := ""
  221. if gitdiffFile.PatchHeader != nil {
  222. commitSHA = gitdiffFile.PatchHeader.SHA
  223. if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
  224. continue
  225. }
  226. }
  227. d.addCommit(commitSHA)
  228. s.Go(func() error {
  229. for _, textFragment := range gitdiffFile.TextFragments {
  230. if textFragment == nil {
  231. return nil
  232. }
  233. fragment := Fragment{
  234. Raw: textFragment.Raw(gitdiff.OpAdd),
  235. CommitSHA: commitSHA,
  236. FilePath: gitdiffFile.NewName,
  237. }
  238. for _, finding := range d.Detect(fragment) {
  239. d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
  240. }
  241. }
  242. return nil
  243. })
  244. }
  245. if err := s.Wait(); err != nil {
  246. return d.findings, err
  247. }
  248. log.Debug().Msgf("%d commits scanned. Note: this number might be smaller than expected due to commits with no additions", len(d.commitMap))
  249. return d.findings, nil
  250. }
  251. // DetectFiles accepts a path to a source directory or file and begins a scan of the
  252. // file or directory.
  253. func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
  254. s := semgroup.NewGroup(context.Background(), 4)
  255. paths := make(chan string)
  256. s.Go(func() error {
  257. defer close(paths)
  258. return filepath.Walk(source,
  259. func(path string, fInfo os.FileInfo, err error) error {
  260. if err != nil {
  261. return err
  262. }
  263. if fInfo.Name() == ".git" {
  264. return filepath.SkipDir
  265. }
  266. if fInfo.Mode().IsRegular() {
  267. paths <- path
  268. }
  269. return nil
  270. })
  271. })
  272. for pa := range paths {
  273. p := pa
  274. s.Go(func() error {
  275. b, err := os.ReadFile(p)
  276. if err != nil {
  277. return err
  278. }
  279. fragment := Fragment{
  280. Raw: string(b),
  281. FilePath: p,
  282. }
  283. for _, finding := range d.Detect(fragment) {
  284. // need to add 1 since line counting starts at 1
  285. finding.EndLine++
  286. finding.StartLine++
  287. d.addFinding(finding)
  288. }
  289. return nil
  290. })
  291. }
  292. if err := s.Wait(); err != nil {
  293. return d.findings, err
  294. }
  295. return d.findings, nil
  296. }
  297. // Detect scans the given fragment and returns a list of findings
  298. func (d *Detector) Detect(fragment Fragment) []report.Finding {
  299. var findings []report.Finding
  300. // check if filepath is allowed
  301. if d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
  302. fragment.FilePath == d.Config.Path {
  303. return findings
  304. }
  305. // add newline indices for location calculation in detectRule
  306. fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)
  307. for _, rule := range d.Config.Rules {
  308. findings = append(findings, d.detectRule(fragment, rule)...)
  309. }
  310. return filter(findings, d.Redact)
  311. }
  312. // addFinding synchronously adds a finding to the findings slice
  313. func (d *Detector) addFinding(finding report.Finding) {
  314. d.findingMutex.Lock()
  315. d.findings = append(d.findings, finding)
  316. if d.Verbose {
  317. printFinding(finding)
  318. }
  319. d.findingMutex.Unlock()
  320. }
  321. // addCommit synchronously adds a commit to the commit slice
  322. func (d *Detector) addCommit(commit string) {
  323. d.commitMap[commit] = true
  324. }