| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- package config
- import (
- "math"
- "path/filepath"
- "regexp"
- )
- // Rule is a struct that contains information that is loaded from a gitleaks config.
- // This struct is used in the Config struct as an array of Rules and is iterated
- // over during an scan. Each rule will be checked. If a regex match is found AND
- // that match is not allowlisted (globally or locally), then a leak will be appended
- // to the final scan report.
- type Rule struct {
- Description string
- Regex *regexp.Regexp
- File *regexp.Regexp
- Path *regexp.Regexp
- ReportGroup int
- Tags []string
- AllowList AllowList
- Entropies []Entropy
- }
- // Inspect checks the content of a line for a leak
- func (r *Rule) Inspect(line string) string {
- offender := r.Regex.FindString(line)
- if offender == "" {
- return ""
- }
- // check if offender is allowed
- if r.RegexAllowed(line) {
- return ""
- }
- // check entropy
- groups := r.Regex.FindStringSubmatch(offender)
- if len(r.Entropies) != 0 && !r.ContainsEntropyLeak(groups) {
- return ""
- }
- // 0 is a match for the full regex pattern
- if 0 < r.ReportGroup && r.ReportGroup < len(groups) {
- offender = groups[r.ReportGroup]
- }
- return offender
- }
- // RegexAllowed checks if the content is allowlisted
- func (r *Rule) RegexAllowed(content string) bool {
- return anyRegexMatch(content, r.AllowList.Regexes)
- }
- // CommitAllowed checks if a commit is allowlisted
- func (r *Rule) CommitAllowed(commit string) bool {
- return r.AllowList.CommitAllowed(commit)
- }
- // ContainsEntropyLeak checks if there is an entropy leak
- func (r *Rule) ContainsEntropyLeak(groups []string) bool {
- for _, e := range r.Entropies {
- if len(groups) > e.Group {
- entropy := shannonEntropy(groups[e.Group])
- if entropy >= e.Min && entropy <= e.Max {
- return true
- }
- }
- }
- return false
- }
- // HasFileOrPathLeakOnly first checks if there are no entropy/regex rules, then checks if
- // there are any file/path leaks
- func (r *Rule) HasFileOrPathLeakOnly(filePath string) bool {
- if r.Regex.String() != "" {
- return false
- }
- if len(r.Entropies) != 0 {
- return false
- }
- if r.AllowList.FileAllowed(filepath.Base(filePath)) || r.AllowList.PathAllowed(filePath) {
- return false
- }
- return r.HasFileLeak(filepath.Base(filePath)) || r.HasFilePathLeak(filePath)
- }
- // HasFileLeak checks if there is a file leak
- func (r *Rule) HasFileLeak(fileName string) bool {
- return regexMatched(fileName, r.File)
- }
- // HasFilePathLeak checks if there is a path leak
- func (r *Rule) HasFilePathLeak(filePath string) bool {
- return regexMatched(filePath, r.Path)
- }
- // shannonEntropy calculates the entropy of data using the formula defined here:
- // https://en.wiktionary.org/wiki/Shannon_entropy
- // Another way to think about what this is doing is calculating the number of bits
- // needed to on average encode the data. So, the higher the entropy, the more random the data, the
- // more bits needed to encode that data.
- func shannonEntropy(data string) (entropy float64) {
- if data == "" {
- return 0
- }
- charCounts := make(map[rune]int)
- for _, char := range data {
- charCounts[char]++
- }
- invLength := 1.0 / float64(len(data))
- for _, count := range charCounts {
- freq := float64(count) * invLength
- entropy -= freq * math.Log2(freq)
- }
- return entropy
- }
- // regexMatched matched an interface to a regular expression. The interface f can
- // be a string type or go-git *object.File type.
- func regexMatched(f string, re *regexp.Regexp) bool {
- if re == nil {
- return false
- }
- if re.FindString(f) != "" {
- return true
- }
- return false
- }
- // anyRegexMatch matched an interface to a regular expression. The interface f can
- // be a string type or go-git *object.File type.
- func anyRegexMatch(f string, res []*regexp.Regexp) bool {
- for _, re := range res {
- if regexMatched(f, re) {
- return true
- }
- }
- return false
- }
|