瀏覽代碼

Bugs v7 (#475)

* fixing some v7 bugs
Zachary Rice 5 年之前
父節點
當前提交
7db08bfd90

+ 56 - 0
config/allowlist.go

@@ -0,0 +1,56 @@
+package config
+
+import (
+	"regexp"
+)
+
+// AllowList is struct containing items that if encountered will allowlist
+// a commit/line of code that would be considered a leak.
+type AllowList struct {
+	Description string
+	Regexes     []*regexp.Regexp
+	Commits     []string
+	Files       []*regexp.Regexp
+	Paths       []*regexp.Regexp
+	Repos       []*regexp.Regexp
+}
+
+// CommitAllowed checks if a commit is allowlisted
+func (a *AllowList) CommitAllowed(commit string) bool {
+	for _, hash := range a.Commits {
+		if commit == hash {
+			return true
+		}
+	}
+	return false
+}
+
+// FileAllowed checks if a file is allowlisted
+func (a *AllowList) FileAllowed(fileName string) bool {
+	return anyRegexMatch(fileName, a.Files)
+}
+
+// PathAllowed checks if a path is allowlisted
+func (a *AllowList) PathAllowed(filePath string) bool {
+	return anyRegexMatch(filePath, a.Paths)
+}
+
+// RegexAllowed checks if a regex is allowlisted
+func (a *AllowList) RegexAllowed(content string) bool {
+	return anyRegexMatch(content, a.Regexes)
+}
+
+// RepoAllowed checks if a regex is allowlisted
+func (a *AllowList) RepoAllowed(repo string) bool {
+	return anyRegexMatch(repo, a.Repos)
+}
+
+// IgnoreDotGit appends a `.git$` rule to ignore all .git paths. This is used for --no-git scans
+func (a *AllowList) IgnoreDotGit() error {
+	re, err := regexp.Compile(".git$")
+	if err != nil {
+		return err
+	}
+	a.Paths = append(a.Paths, re)
+	return nil
+}

+ 5 - 32
config/config.go

@@ -13,15 +13,11 @@ import (
 	log "github.com/sirupsen/logrus"
 )
 
-// AllowList is struct containing items that if encountered will allowlist
-// a commit/line of code that would be considered a leak.
-type AllowList struct {
-	Description string
-	Regexes     []*regexp.Regexp
-	Commits     []string
-	Files       []*regexp.Regexp
-	Paths       []*regexp.Regexp
-	Repos       []*regexp.Regexp
+// Config is a composite struct of Rules and Allowlists
+// Each Rule contains a description, regular expression, tags, and allowlists if available
+type Config struct {
+	Rules     []Rule
+	Allowlist AllowList
 }
 
 // Entropy represents an entropy range
@@ -31,29 +27,6 @@ type Entropy struct {
 	Group int
 }
 
-// Rule is a struct that contains information that is loaded from a gitleaks config.
-// This struct is used in the Config struct as an array of Rules and is iterated
-// over during an scan. Each rule will be checked. If a regex match is found AND
-// that match is not allowlisted (globally or locally), then a leak will be appended
-// to the final scan report.
-type Rule struct {
-	Description string
-	Regex       *regexp.Regexp
-	File        *regexp.Regexp
-	Path        *regexp.Regexp
-	ReportGroup int
-	Tags        []string
-	AllowList   AllowList
-	Entropies   []Entropy
-}
-
-// Config is a composite struct of Rules and Allowlists
-// Each Rule contains a description, regular expression, tags, and allowlists if available
-type Config struct {
-	Rules     []Rule
-	Allowlist AllowList
-}
-
 // TomlAllowList is a struct used in the TomlLoader that loads in allowlists from
 // specific rules or globally at the top level config
 type TomlAllowList struct {

+ 140 - 0
config/rule.go

@@ -0,0 +1,140 @@
+package config
+
+import (
+	"math"
+	"path/filepath"
+	"regexp"
+)
+
+// Rule is a struct that contains information that is loaded from a gitleaks config.
+// This struct is used in the Config struct as an array of Rules and is iterated
+// over during an scan. Each rule will be checked. If a regex match is found AND
+// that match is not allowlisted (globally or locally), then a leak will be appended
+// to the final scan report.
+type Rule struct {
+	Description string
+	Regex       *regexp.Regexp
+	File        *regexp.Regexp
+	Path        *regexp.Regexp
+	ReportGroup int
+	Tags        []string
+	AllowList   AllowList
+	Entropies   []Entropy
+}
+
+// Inspect checks the content of a line for a leak
+func (r *Rule) Inspect(line string) string {
+	offender := r.Regex.FindString(line)
+	if offender == "" {
+		return ""
+	}
+
+	// check if offender is allowed
+	if r.RegexAllowed(line) {
+		return ""
+	}
+
+	// check entropy
+	groups := r.Regex.FindStringSubmatch(offender)
+	if len(r.Entropies) != 0 && !r.ContainsEntropyLeak(groups) {
+		return ""
+	}
+
+	// 0 is a match for the full regex pattern
+	if 0 < r.ReportGroup && r.ReportGroup < len(groups) {
+		offender = groups[r.ReportGroup]
+	}
+	return offender
+}
+
+// RegexAllowed checks if the content is allowlisted
+func (r *Rule) RegexAllowed(content string) bool {
+	return anyRegexMatch(content, r.AllowList.Regexes)
+}
+
+// CommitAllowed checks if a commit is allowlisted
+func (r *Rule) CommitAllowed(commit string) bool {
+	return r.AllowList.CommitAllowed(commit)
+}
+
+// ContainsEntropyLeak checks if there is an entropy leak
+func (r *Rule) ContainsEntropyLeak(groups []string) bool {
+	for _, e := range r.Entropies {
+		if len(groups) > e.Group {
+			entropy := shannonEntropy(groups[e.Group])
+			if entropy >= e.Min && entropy <= e.Max {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// HasFileOrPathLeakOnly first checks if there are no entropy/regex rules, then checks if
+// there are any file/path leaks
+func (r *Rule) HasFileOrPathLeakOnly(filePath string) bool {
+	if r.Regex.String() != "" {
+		return false
+	}
+	if len(r.Entropies) != 0 {
+		return false
+	}
+	return r.HasFileLeak(filepath.Base(filePath)) || r.HasFilePathLeak(filePath)
+}
+
+// HasFileLeak checks if there is a file leak
+func (r *Rule) HasFileLeak(fileName string) bool {
+	return regexMatched(fileName, r.File)
+}
+
+// HasFilePathLeak checks if there is a path leak
+func (r *Rule) HasFilePathLeak(filePath string) bool {
+	return regexMatched(filePath, r.Path)
+}
+
+// shannonEntropy calculates the entropy of data using the formula defined here:
+// https://en.wiktionary.org/wiki/Shannon_entropy
+// Another way to think about what this is doing is calculating the number of bits
+// needed to on average encode the data. So, the higher the entropy, the more random the data, the
+// more bits needed to encode that data.
+func shannonEntropy(data string) (entropy float64) {
+	if data == "" {
+		return 0
+	}
+
+	charCounts := make(map[rune]int)
+	for _, char := range data {
+		charCounts[char]++
+	}
+
+	invLength := 1.0 / float64(len(data))
+	for _, count := range charCounts {
+		freq := float64(count) * invLength
+		entropy -= freq * math.Log2(freq)
+	}
+
+	return entropy
+}
+
+// regexMatched matched an interface to a regular expression. The interface f can
+// be a string type or go-git *object.File type.
+func regexMatched(f string, re *regexp.Regexp) bool {
+	if re == nil {
+		return false
+	}
+	if re.FindString(f) != "" {
+		return true
+	}
+	return false
+}
+
+// anyRegexMatch matched an interface to a regular expression. The interface f can
+// be a string type or go-git *object.File type.
+func anyRegexMatch(f string, res []*regexp.Regexp) bool {
+	for _, re := range res {
+		if regexMatched(f, re) {
+			return true
+		}
+	}
+	return false
+}

+ 1 - 3
main.go

@@ -5,8 +5,6 @@ import (
 	"os/signal"
 	"time"
 
-	"github.com/zricethezav/gitleaks/v7/report"
-
 	"github.com/zricethezav/gitleaks/v7/config"
 	"github.com/zricethezav/gitleaks/v7/options"
 	"github.com/zricethezav/gitleaks/v7/scan"
@@ -59,7 +57,7 @@ func main() {
 	}
 
 	// report scan
-	if err := report.WriteReport(scannerReport, opts, cfg); err != nil {
+	if err := scan.WriteReport(scannerReport, opts, cfg); err != nil {
 		log.Error(err)
 		os.Exit(1)
 	}

+ 0 - 33
report/leak.go

@@ -1,33 +0,0 @@
-package report
-
-import (
-	"strings"
-	"time"
-)
-
-// Leak is a struct that contains information about some line of code that contains
-// sensitive information as determined by the rules set in a gitleaks config
-type Leak struct {
-	Line       string    `json:"line"`
-	LineNumber int       `json:"lineNumber"`
-	Offender   string    `json:"offender"`
-	Commit     string    `json:"commit"`
-	Repo       string    `json:"repo"`
-	RepoURL    string    `json:"repoURL"`
-	LeakURL    string    `json:"leakURL"`
-	Rule       string    `json:"rule"`
-	Message    string    `json:"commitMessage"`
-	Author     string    `json:"author"`
-	Email      string    `json:"email"`
-	File       string    `json:"file"`
-	Date       time.Time `json:"date"`
-	Tags       string    `json:"tags"`
-}
-
-// RedactLeak will replace the offending string with "REDACTED" in both
-// the offender and line field of the leak which.
-func RedactLeak(leak Leak) Leak {
-	leak.Line = strings.Replace(leak.Line, leak.Offender, "REDACTED", -1)
-	leak.Offender = "REDACTED"
-	return leak
-}

+ 95 - 33
scan/commit.go

@@ -2,8 +2,8 @@ package scan
 
 import (
 	"fmt"
-
-	"github.com/zricethezav/gitleaks/v7/report"
+	"path/filepath"
+	"strings"
 
 	"github.com/go-git/go-git/v5"
 	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
@@ -16,6 +16,7 @@ type CommitScanner struct {
 	repo     *git.Repository
 	repoName string
 	commit   *object.Commit
+	patch    *object.Patch
 }
 
 // NewCommitScanner creates and returns a commit scanner
@@ -30,57 +31,118 @@ func NewCommitScanner(base BaseScanner, repo *git.Repository, commit *object.Com
 	return cs
 }
 
+// SetRepoName sets the repo name of the scanner.
+func (cs *CommitScanner) SetRepoName(repoName string) {
+	cs.repoName = repoName
+}
+
+// SetPatch sets the patch to be inspected by the commit scanner. This is used to avoid
+// a race condition when running a threaded repo scan
+func (cs *CommitScanner) SetPatch(patch *object.Patch) {
+	cs.patch = patch
+}
+
 // Scan kicks off a CommitScanner Scan
-func (cs *CommitScanner) Scan() (report.Report, error) {
-	var scannerReport report.Report
+func (cs *CommitScanner) Scan() (Report, error) {
+	var scannerReport Report
 	if len(cs.commit.ParentHashes) == 0 {
 		facScanner := NewFilesAtCommitScanner(cs.BaseScanner, cs.repo, cs.commit)
 		return facScanner.Scan()
 	}
 
-	err := cs.commit.Parents().ForEach(func(parent *object.Commit) error {
-		defer func() {
-			if err := recover(); err != nil {
-				// sometimes the Patch generation will fail due to a known bug in
-				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
-				// Once a fix has been merged I will remove this recover.
-				return
-			}
-		}()
+	if cs.patch == nil {
+		parent, err := cs.commit.Parent(0)
+		if err != nil {
+			return scannerReport, err
+		}
+
 		if parent == nil {
-			return nil
+			return scannerReport, nil
 		}
 
-		patch, err := parent.Patch(cs.commit)
+		cs.patch, err = parent.Patch(cs.commit)
 		if err != nil {
-			return fmt.Errorf("could not generate Patch")
+			return scannerReport, fmt.Errorf("could not generate Patch")
 		}
+	}
 
-		patchContent := patch.String()
+	patchContent := cs.patch.String()
 
-		for _, f := range patch.FilePatches() {
-			if f.IsBinary() {
-				continue
-			}
-			for _, chunk := range f.Chunks() {
-				if chunk.Type() == fdiff.Add {
-					_, to := f.Files()
-					leaks := checkRules(cs.BaseScanner, cs.commit, cs.repoName, to.Path(), chunk.Content())
+	for _, f := range cs.patch.FilePatches() {
+		if f.IsBinary() {
+			continue
+		}
+		for _, chunk := range f.Chunks() {
+			if chunk.Type() == fdiff.Add {
+				_, to := f.Files()
+				if cs.cfg.Allowlist.FileAllowed(filepath.Base(to.Path())) ||
+					cs.cfg.Allowlist.PathAllowed(to.Path()) {
+					continue
+				}
 
-					lineLookup := make(map[string]bool)
-					for _, leak := range leaks {
-						leak.LineNumber = extractLine(patchContent, leak, lineLookup)
-						leak.LeakURL = leakURL(leak)
+				// Check individual file path ONLY rules
+				for _, rule := range cs.cfg.Rules {
+					if rule.CommitAllowed(cs.commit.Hash.String()) {
+						continue
+					}
+
+					if rule.HasFileOrPathLeakOnly(to.Path()) {
+						leak := NewLeak("", "Filename or path offender: "+to.Path(), defaultLineNumber).WithCommit(cs.commit)
+						leak.Repo = cs.repoName
+						leak.File = to.Path()
+						leak.RepoURL = cs.opts.RepoURL
+						leak.LeakURL = leak.URL()
+						leak.Rule = rule.Description
+						leak.Tags = strings.Join(rule.Tags, ", ")
+
+						if cs.opts.Verbose {
+							leak.Log(cs.opts.Redact)
+						}
 						scannerReport.Leaks = append(scannerReport.Leaks, leak)
+						continue
+					}
+				}
+
+				lineLookup := make(map[string]bool)
+
+				// Check the actual content
+				for _, line := range strings.Split(chunk.Content(), "\n") {
+					for _, rule := range cs.cfg.Rules {
+						offender := rule.Inspect(line)
+						if offender == "" {
+							continue
+						}
+						if cs.cfg.Allowlist.RegexAllowed(line) ||
+							rule.AllowList.FileAllowed(filepath.Base(to.Path())) ||
+							rule.AllowList.PathAllowed(to.Path()) ||
+							rule.AllowList.CommitAllowed(cs.commit.Hash.String()) {
+							continue
+						}
+
+						if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(to.Path())) {
+							continue
+						}
+						if rule.Path.String() != "" && !rule.HasFilePathLeak(to.Path()) {
+							continue
+						}
+
+						leak := NewLeak(line, offender, defaultLineNumber).WithCommit(cs.commit)
+						leak.File = to.Path()
+						leak.LineNumber = extractLine(patchContent, leak, lineLookup)
+						leak.RepoURL = cs.opts.RepoURL
+						leak.Repo = cs.repoName
+						leak.LeakURL = leak.URL()
+						leak.Rule = rule.Description
+						leak.Tags = strings.Join(rule.Tags, ", ")
 						if cs.opts.Verbose {
-							logLeak(leak, cs.opts.Redact)
+							leak.Log(cs.opts.Redact)
 						}
+						scannerReport.Leaks = append(scannerReport.Leaks, leak)
 					}
 				}
 			}
 		}
-		return nil
-	})
+	}
 	scannerReport.Commits = 1
-	return scannerReport, err
+	return scannerReport, nil
 }

+ 2 - 3
scan/commits.go

@@ -2,7 +2,6 @@ package scan
 
 import (
 	"github.com/go-git/go-git/v5"
-	"github.com/zricethezav/gitleaks/v7/report"
 )
 
 // CommitsScanner is a commit scanner
@@ -25,8 +24,8 @@ func NewCommitsScanner(base BaseScanner, repo *git.Repository, commits []string)
 }
 
 // Scan kicks off a CommitsScanner Scan
-func (css *CommitsScanner) Scan() (report.Report, error) {
-	var scannerReport report.Report
+func (css *CommitsScanner) Scan() (Report, error) {
+	var scannerReport Report
 	for _, c := range css.commits {
 		c, err := obtainCommit(css.repo, c)
 		if err != nil {

+ 64 - 4
scan/filesatcommit.go

@@ -1,9 +1,11 @@
 package scan
 
 import (
+	"path/filepath"
+	"strings"
+
 	"github.com/go-git/go-git/v5"
 	"github.com/go-git/go-git/v5/plumbing/object"
-	"github.com/zricethezav/gitleaks/v7/report"
 )
 
 // FilesAtCommitScanner is a files at commit scanner. This differs from CommitScanner
@@ -31,8 +33,8 @@ func NewFilesAtCommitScanner(base BaseScanner, repo *git.Repository, commit *obj
 }
 
 // Scan kicks off a FilesAtCommitScanner Scan
-func (fs *FilesAtCommitScanner) Scan() (report.Report, error) {
-	var scannerReport report.Report
+func (fs *FilesAtCommitScanner) Scan() (Report, error) {
+	var scannerReport Report
 	fIter, err := fs.commit.Files()
 	if err != nil {
 		return scannerReport, err
@@ -51,7 +53,65 @@ func (fs *FilesAtCommitScanner) Scan() (report.Report, error) {
 			return err
 		}
 
-		scannerReport.Leaks = append(scannerReport.Leaks, checkRules(fs.BaseScanner, fs.commit, fs.repoName, f.Name, content)...)
+		// Check individual file path ONLY rules
+		for _, rule := range fs.cfg.Rules {
+			if rule.CommitAllowed(fs.commit.Hash.String()) {
+				continue
+			}
+
+			if rule.HasFileOrPathLeakOnly(f.Name) {
+				leak := NewLeak("", "Filename or path offender: "+f.Name, defaultLineNumber).WithCommit(fs.commit)
+				leak.Repo = fs.repoName
+				leak.File = f.Name
+				leak.RepoURL = fs.opts.RepoURL
+				leak.LeakURL = leak.URL()
+				leak.Rule = rule.Description
+				leak.Tags = strings.Join(rule.Tags, ", ")
+
+				if fs.opts.Verbose {
+					leak.Log(fs.opts.Redact)
+				}
+				scannerReport.Leaks = append(scannerReport.Leaks, leak)
+				continue
+			}
+		}
+
+		for i, line := range strings.Split(content, "\n") {
+			for _, rule := range fs.cfg.Rules {
+				offender := rule.Inspect(line)
+
+				if offender == "" {
+					continue
+				}
+				if fs.cfg.Allowlist.RegexAllowed(line) ||
+					rule.AllowList.FileAllowed(filepath.Base(f.Name)) ||
+					rule.AllowList.PathAllowed(f.Name) ||
+					rule.AllowList.CommitAllowed(fs.commit.Hash.String()) {
+					continue
+				}
+
+				if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(f.Name)) {
+					continue
+				}
+				if rule.Path.String() != "" && !rule.HasFilePathLeak(f.Name) {
+					continue
+				}
+
+				leak := NewLeak(line, offender, defaultLineNumber).WithCommit(fs.commit)
+				leak.File = f.Name
+				leak.LineNumber = i + 1
+				leak.RepoURL = fs.opts.RepoURL
+				leak.Repo = fs.repoName
+				leak.LeakURL = leak.URL()
+				leak.Rule = rule.Description
+				leak.Tags = strings.Join(rule.Tags, ", ")
+				if fs.opts.Verbose {
+					leak.Log(fs.opts.Redact)
+				}
+				scannerReport.Leaks = append(scannerReport.Leaks, leak)
+			}
+		}
+
 		return nil
 	})
 

+ 74 - 0
scan/leak.go

@@ -0,0 +1,74 @@
+package scan
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/go-git/go-git/v5/plumbing/object"
+)
+
+// Leak is a struct that contains information about some line of code that contains
+// sensitive information as determined by the rules set in a gitleaks config
+type Leak struct {
+	Line       string    `json:"line"`
+	LineNumber int       `json:"lineNumber"`
+	Offender   string    `json:"offender"`
+	Commit     string    `json:"commit"`
+	Repo       string    `json:"repo"`
+	RepoURL    string    `json:"repoURL"`
+	LeakURL    string    `json:"leakURL"`
+	Rule       string    `json:"rule"`
+	Message    string    `json:"commitMessage"`
+	Author     string    `json:"author"`
+	Email      string    `json:"email"`
+	File       string    `json:"file"`
+	Date       time.Time `json:"date"`
+	Tags       string    `json:"tags"`
+}
+
+// RedactLeak will replace the offending string with "REDACTED" in both
+// the offender and line field of the leak which.
+func RedactLeak(leak Leak) Leak {
+	leak.Line = strings.Replace(leak.Line, leak.Offender, "REDACTED", -1)
+	leak.Offender = "REDACTED"
+	return leak
+}
+
+// NewLeak creates a new leak from common data all leaks must have, line, offender, linenumber
+func NewLeak(line string, offender string, lineNumber int) Leak {
+	return Leak{
+		Line:       line,
+		Offender:   offender,
+		LineNumber: lineNumber,
+	}
+}
+
+// WithCommit adds commit data to the leak
+func (leak Leak) WithCommit(commit *object.Commit) Leak {
+	leak.Commit = commit.Hash.String()
+	leak.Author = commit.Author.Name
+	leak.Email = commit.Author.Email
+	leak.Message = commit.Message
+	leak.Date = commit.Author.When
+	return leak
+}
+
+// Log logs a leak and redacts if necessary
+func (leak Leak) Log(redact bool) {
+	if redact {
+		leak = RedactLeak(leak)
+	}
+	var b []byte
+	b, _ = json.MarshalIndent(leak, "", "	")
+	fmt.Println(string(b))
+}
+
+// URL generates a url to the leak if leak.RepoURL is set
+func (leak Leak) URL() string {
+	if leak.RepoURL != "" {
+		return fmt.Sprintf("%s/blob/%s/%s#L%d", leak.RepoURL, leak.Commit, leak.File, leak.LineNumber)
+	}
+	return ""
+}

+ 71 - 37
scan/nogit.go

@@ -5,9 +5,9 @@ import (
 	"context"
 	"os"
 	"path/filepath"
-	"sync"
+	"strings"
 
-	"github.com/zricethezav/gitleaks/v7/report"
+	log "github.com/sirupsen/logrus"
 
 	"golang.org/x/sync/errgroup"
 )
@@ -15,34 +15,34 @@ import (
 // NoGitScanner is a scanner that absolutely despises git
 type NoGitScanner struct {
 	BaseScanner
-	leakChan chan report.Leak
-	leakWG   *sync.WaitGroup
-	leaks    []report.Leak
 }
 
 // NewNoGitScanner creates and returns a nogit scanner. This is used for scanning files and directories
 func NewNoGitScanner(base BaseScanner) *NoGitScanner {
 	ngs := &NoGitScanner{
 		BaseScanner: base,
-		leakChan:    make(chan report.Leak),
-		leakWG:      &sync.WaitGroup{},
 	}
 
-	go ngs.receiveLeaks()
-
 	ngs.scannerType = typeNoGitScanner
 
+	// no-git scans should ignore .git folders by default
+	// issue: https://github.com/zricethezav/gitleaks/issues/474
+	// ngs.cfg.Allowlist
+	err := ngs.cfg.Allowlist.IgnoreDotGit()
+	if err != nil {
+		log.Error(err)
+		return nil
+	}
+
 	return ngs
 }
 
 // Scan kicks off a NoGitScanner Scan
-func (ngs *NoGitScanner) Scan() (report.Report, error) {
-	var scannerReport report.Report
+func (ngs *NoGitScanner) Scan() (Report, error) {
+	var scannerReport Report
 
 	g, _ := errgroup.WithContext(context.Background())
-	paths := make(chan string)
-	semaphore := make(chan bool, howManyThreads(ngs.opts.Threads))
-	wg := sync.WaitGroup{}
+	paths := make(chan string, 100)
 
 	g.Go(func() error {
 		defer close(paths)
@@ -58,46 +58,80 @@ func (ngs *NoGitScanner) Scan() (report.Report, error) {
 			})
 	})
 
+	leaks := make(chan Leak, 100)
+
 	for path := range paths {
 		p := path
-		wg.Add(1)
-		semaphore <- true
 		g.Go(func() error {
-			defer func() {
-				<-semaphore
-				wg.Done()
-			}()
+			if ngs.cfg.Allowlist.FileAllowed(filepath.Base(p)) ||
+				ngs.cfg.Allowlist.PathAllowed(p) {
+				return nil
+			}
+
+			for _, rule := range ngs.cfg.Rules {
+				if rule.HasFileOrPathLeakOnly(p) {
+					leak := NewLeak("", "Filename or path offender: "+p, defaultLineNumber)
+					leak.File = p
+					leak.Rule = rule.Description
+					leak.Tags = strings.Join(rule.Tags, ", ")
+
+					if ngs.opts.Verbose {
+						leak.Log(ngs.opts.Redact)
+					}
+					leaks <- leak
+				}
+			}
+
 			f, err := os.Open(p)
 			if err != nil {
 				return err
 			}
 			scanner := bufio.NewScanner(f)
-			line := 0
+			lineNumber := 0
 			for scanner.Scan() {
-				line++
-				leaks := checkRules(ngs.BaseScanner, emptyCommit(), "", f.Name(), scanner.Text())
-				for _, leak := range leaks {
-					leak.LineNumber = line
+				lineNumber++
+				for _, rule := range ngs.cfg.Rules {
+					line := scanner.Text()
+					offender := rule.Inspect(line)
+					if offender == "" {
+						continue
+					}
+					if ngs.cfg.Allowlist.RegexAllowed(line) ||
+						rule.AllowList.FileAllowed(filepath.Base(p)) ||
+						rule.AllowList.PathAllowed(p) {
+						continue
+					}
+
+					if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(p)) {
+						continue
+					}
+					if rule.Path.String() != "" && !rule.HasFilePathLeak(p) {
+						continue
+					}
+
+					leak := NewLeak(line, offender, defaultLineNumber)
+					leak.File = p
+					leak.LineNumber = lineNumber
+					leak.Rule = rule.Description
+					leak.Tags = strings.Join(rule.Tags, ", ")
 					if ngs.opts.Verbose {
-						logLeak(leak, ngs.opts.Redact)
+						leak.Log(ngs.opts.Redact)
 					}
-					ngs.leakWG.Add(1)
-					ngs.leakChan <- leak
+					leaks <- leak
 				}
 			}
 			return f.Close()
 		})
 	}
-	wg.Wait()
-	ngs.leakWG.Wait()
-	scannerReport.Leaks = ngs.leaks
 
-	return scannerReport, nil
-}
+	go func() {
+		g.Wait()
+		close(leaks)
+	}()
 
-func (ngs *NoGitScanner) receiveLeaks() {
-	for leak := range ngs.leakChan {
-		ngs.leaks = append(ngs.leaks, leak)
-		ngs.leakWG.Done()
+	for leak := range leaks {
+		scannerReport.Leaks = append(scannerReport.Leaks, leak)
 	}
+
+	return scannerReport, g.Wait()
 }

+ 13 - 10
scan/parent.go

@@ -4,7 +4,7 @@ import (
 	"io/ioutil"
 	"path/filepath"
 
-	"github.com/zricethezav/gitleaks/v7/report"
+	"github.com/zricethezav/gitleaks/v7/config"
 
 	"github.com/go-git/go-git/v5"
 	log "github.com/sirupsen/logrus"
@@ -25,8 +25,8 @@ func NewParentScanner(base BaseScanner) *ParentScanner {
 }
 
 // Scan kicks off a ParentScanner scan. This uses the directory from --path to discovery repos
-func (ds *ParentScanner) Scan() (report.Report, error) {
-	var scannerReport report.Report
+func (ds *ParentScanner) Scan() (Report, error) {
+	var scannerReport Report
 	log.Debugf("scanning repos in %s\n", ds.opts.Path)
 
 	files, err := ioutil.ReadDir(ds.opts.Path)
@@ -46,16 +46,19 @@ func (ds *ParentScanner) Scan() (report.Report, error) {
 			}
 			return scannerReport, err
 		}
-		skip := false
-		for _, allowListedRepo := range ds.cfg.Allowlist.Repos {
-			if regexMatched(f.Name(), allowListedRepo) {
-				skip = true
-			}
-		}
-		if skip {
+		if ds.cfg.Allowlist.RepoAllowed(f.Name()) {
 			continue
 		}
 
+		if ds.opts.RepoConfigPath != "" {
+			cfg, err := config.LoadRepoConfig(repo, ds.opts.RepoConfigPath)
+			if err != nil {
+				log.Warn(err)
+			} else {
+				ds.BaseScanner.cfg = cfg
+			}
+		}
+
 		rs := NewRepoScanner(ds.BaseScanner, repo)
 		rs.repoName = f.Name()
 		repoReport, err := rs.Scan()

+ 20 - 51
scan/repo.go

@@ -1,15 +1,14 @@
 package scan
 
 import (
+	"fmt"
 	"sync"
 
-	"github.com/zricethezav/gitleaks/v7/report"
+	log "github.com/sirupsen/logrus"
 
 	"github.com/go-git/go-git/v5"
-	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
 	"github.com/go-git/go-git/v5/plumbing/object"
 	"github.com/go-git/go-git/v5/plumbing/storer"
-	log "github.com/sirupsen/logrus"
 )
 
 // RepoScanner is a repo scanner
@@ -18,10 +17,10 @@ type RepoScanner struct {
 	repo     *git.Repository
 	repoName string
 
-	leakChan  chan report.Leak
+	leakChan  chan Leak
 	leakWG    *sync.WaitGroup
 	leakCache map[string]bool
-	leaks     []report.Leak
+	leaks     []Leak
 }
 
 // NewRepoScanner returns a new repo scanner (go figure). This function also
@@ -30,7 +29,7 @@ func NewRepoScanner(base BaseScanner, repo *git.Repository) *RepoScanner {
 	rs := &RepoScanner{
 		BaseScanner: base,
 		repo:        repo,
-		leakChan:    make(chan report.Leak),
+		leakChan:    make(chan Leak),
 		leakWG:      &sync.WaitGroup{},
 		leakCache:   make(map[string]bool),
 		repoName:    getRepoName(base.opts),
@@ -44,8 +43,8 @@ func NewRepoScanner(base BaseScanner, repo *git.Repository) *RepoScanner {
 }
 
 // Scan kicks of a repo scan
-func (rs *RepoScanner) Scan() (report.Report, error) {
-	var scannerReport report.Report
+func (rs *RepoScanner) Scan() (Report, error) {
+	var scannerReport Report
 	logOpts, err := logOptions(rs.repo, rs.opts)
 	if err != nil {
 		return scannerReport, err
@@ -62,8 +61,7 @@ func (rs *RepoScanner) Scan() (report.Report, error) {
 			return storer.ErrStop
 		}
 
-		// Check if Commit is allowlisted
-		if isCommitAllowListed(c.Hash.String(), rs.cfg.Allowlist.Commits) {
+		if rs.cfg.Allowlist.CommitAllowed(c.Hash.String()) {
 			return nil
 		}
 
@@ -84,28 +82,12 @@ func (rs *RepoScanner) Scan() (report.Report, error) {
 		// (they exist as the tip of other branches, etc)
 		// See https://github.com/zricethezav/gitleaks/issues/413 for details
 		parent, err := c.Parent(0)
-		if err != nil {
+		if err != nil || parent == nil {
 			return err
 		}
-
-		defer func() {
-			if err := recover(); err != nil {
-				// sometimes the Patch generation will fail due to a known bug in
-				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
-				// Once a fix has been merged I will remove this recover.
-				return
-			}
-		}()
-
-		if parent == nil {
-			// shouldn't reach this point but just in case
-			return nil
-		}
-
-		// start := time.Now()
 		patch, err := parent.Patch(c)
 		if err != nil {
-			log.Errorf("could not generate Patch")
+			return fmt.Errorf("could not generate Patch")
 		}
 
 		scannerReport.Commits++
@@ -117,29 +99,16 @@ func (rs *RepoScanner) Scan() (report.Report, error) {
 				wg.Done()
 			}()
 
-			// patchContent is used for searching for leak line number
-			patchContent := patch.String()
-
-			for _, f := range patch.FilePatches() {
-				if f.IsBinary() {
-					continue
-				}
-
-				for _, chunk := range f.Chunks() {
-					if chunk.Type() == fdiff.Add {
-						_, to := f.Files()
-						lineLookup := make(map[string]bool)
-						for _, leak := range checkRules(rs.BaseScanner, c, rs.repoName, to.Path(), chunk.Content()) {
-							leak.LineNumber = extractLine(patchContent, leak, lineLookup)
-							leak.LeakURL = leakURL(leak)
-							if rs.opts.Verbose {
-								logLeak(leak, rs.opts.Redact)
-							}
-							rs.leakWG.Add(1)
-							rs.leakChan <- leak
-						}
-					}
-				}
+			commitScanner := NewCommitScanner(rs.BaseScanner, rs.repo, c)
+			commitScanner.SetRepoName(rs.repoName)
+			commitScanner.SetPatch(patch)
+			report, err := commitScanner.Scan()
+			if err != nil {
+				log.Error(err)
+			}
+			for _, leak := range report.Leaks {
+				rs.leakWG.Add(1)
+				rs.leakChan <- leak
 			}
 		}(c, patch)
 

+ 5 - 6
report/report.go → scan/report.go

@@ -1,4 +1,4 @@
-package report
+package scan
 
 import (
 	"encoding/csv"
@@ -6,11 +6,10 @@ import (
 	"os"
 	"time"
 
+	"github.com/sirupsen/logrus"
 	"github.com/zricethezav/gitleaks/v7/config"
 	"github.com/zricethezav/gitleaks/v7/options"
 	"github.com/zricethezav/gitleaks/v7/version"
-
-	log "github.com/sirupsen/logrus"
 )
 
 // Report is a container for leaks and number of commits scanned
@@ -22,12 +21,12 @@ type Report struct {
 // WriteReport accepts a report and options and will write a report if --report has been set
 func WriteReport(report Report, opts options.Options, cfg config.Config) error {
 	if !(opts.NoGit || opts.CheckUncommitted()) {
-		log.Info("commits scanned: ", report.Commits)
+		logrus.Info("commits scanned: ", report.Commits)
 	}
 	if len(report.Leaks) != 0 {
-		log.Warn("leaks found: ", len(report.Leaks))
+		logrus.Warn("leaks found: ", len(report.Leaks))
 	} else {
-		log.Info("No leaks found")
+		logrus.Info("No leaks found")
 		return nil
 	}
 

+ 1 - 1
report/sarif.go → scan/sarif.go

@@ -1,4 +1,4 @@
-package report
+package scan
 
 import (
 	"fmt"

+ 3 - 12
scan/scan.go

@@ -8,13 +8,12 @@ import (
 
 	"github.com/zricethezav/gitleaks/v7/config"
 	"github.com/zricethezav/gitleaks/v7/options"
-	"github.com/zricethezav/gitleaks/v7/report"
 )
 
 // Scanner abstracts unique scanner internals while exposing the Scan function which
 // returns a report.
 type Scanner interface {
-	Scan() (report.Report, error)
+	Scan() (Report, error)
 }
 
 // BaseScanner is a container for common data each scanner needs.
@@ -46,15 +45,7 @@ func NewScanner(opts options.Options, cfg config.Config) (Scanner, error) {
 		repo *git.Repository
 		err  error
 	)
-	// TODO move this block to config parsing?
-	for _, allowListedRepo := range cfg.Allowlist.Repos {
-		if regexMatched(opts.Path, allowListedRepo) {
-			return nil, nil
-		}
-		if regexMatched(opts.RepoURL, allowListedRepo) {
-			return nil, nil
-		}
-	}
+
 	base := BaseScanner{
 		opts: opts,
 		cfg:  cfg,
@@ -79,7 +70,7 @@ func NewScanner(opts options.Options, cfg config.Config) (Scanner, error) {
 	}
 
 	// load up alternative config if possible, if not use manager's config
-	if opts.RepoConfigPath != "" {
+	if opts.RepoConfigPath != "" && !opts.NoGit {
 		base.cfg, err = config.LoadRepoConfig(repo, opts.RepoConfigPath)
 		if err != nil {
 			return nil, err

+ 21 - 8
scan/scan_test.go

@@ -10,8 +10,6 @@ import (
 	"sort"
 	"testing"
 
-	"github.com/zricethezav/gitleaks/v7/report"
-
 	"github.com/zricethezav/gitleaks/v7/config"
 	"github.com/zricethezav/gitleaks/v7/options"
 
@@ -419,6 +417,17 @@ func TestScan(t *testing.T) {
 			},
 			wantPath: "../test_data/test_file1_aws_leak.json",
 		},
+		{
+			description: "test only md files no git",
+			opts: options.Options{
+				Path:         "../test_data/test_repos/",
+				Report:       "../test_data/test_only_files_no_git.json.got",
+				ReportFormat: "json",
+				ConfigPath:   "../test_data/test_configs/onlyFiles.toml",
+				NoGit:        true,
+			},
+			wantPath: "../test_data/test_only_files_no_git.json",
+		},
 	}
 
 	for _, test := range tests {
@@ -457,7 +466,7 @@ func TestScan(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		err = report.WriteReport(scannerReport, test.opts, cfg)
+		err = WriteReport(scannerReport, test.opts, cfg)
 		if err != nil {
 			t.Error(err)
 		}
@@ -584,7 +593,7 @@ func TestScanUncommited(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		err = report.WriteReport(scannerReport, test.opts, cfg)
+		err = WriteReport(scannerReport, test.opts, cfg)
 		if err != nil {
 			t.Error(err)
 		}
@@ -611,8 +620,8 @@ func TestScanUncommited(t *testing.T) {
 
 func fileCheck(wantPath, gotPath string) error {
 	var (
-		gotLeaks  []report.Leak
-		wantLeaks []report.Leak
+		gotLeaks  []Leak
+		wantLeaks []Leak
 	)
 	want, err := ioutil.ReadFile(wantPath)
 	if err != nil {
@@ -634,8 +643,12 @@ func fileCheck(wantPath, gotPath string) error {
 		return err
 	}
 
-	sort.Slice(gotLeaks, func(i, j int) bool { return (gotLeaks)[i].Commit < (gotLeaks)[j].Commit })
-	sort.Slice(wantLeaks, func(i, j int) bool { return (wantLeaks)[i].Commit < (wantLeaks)[j].Commit })
+	sort.Slice(gotLeaks, func(i, j int) bool {
+		return (gotLeaks)[i].Offender+(gotLeaks)[i].File < (gotLeaks)[j].Offender+(gotLeaks)[j].File
+	})
+	sort.Slice(wantLeaks, func(i, j int) bool {
+		return (wantLeaks)[i].Offender+(wantLeaks)[i].File < (wantLeaks)[j].Offender+(wantLeaks)[j].File
+	})
 
 	if !reflect.DeepEqual(gotLeaks, wantLeaks) {
 		dmp := diffmatchpatch.New()

+ 60 - 22
scan/unstaged.go

@@ -5,11 +5,10 @@ import (
 	"fmt"
 	"io"
 	"os/exec"
+	"path/filepath"
 	"strings"
 	"time"
 
-	"github.com/zricethezav/gitleaks/v7/report"
-
 	"github.com/go-git/go-git/v5"
 	"github.com/go-git/go-git/v5/plumbing"
 	"github.com/sergi/go-diff/diffmatchpatch"
@@ -35,8 +34,8 @@ func NewUnstagedScanner(base BaseScanner, repo *git.Repository) *UnstagedScanner
 }
 
 // Scan kicks off an unstaged scan. This will attempt to determine unstaged changes which are then scanned.
-func (us *UnstagedScanner) Scan() (report.Report, error) {
-	var scannerReport report.Report
+func (us *UnstagedScanner) Scan() (Report, error) {
+	var scannerReport Report
 	r, err := us.repo.Head()
 	if err == plumbing.ErrReferenceNotFound {
 		wt, err := us.repo.Worktree()
@@ -57,12 +56,36 @@ func (us *UnstagedScanner) Scan() (report.Report, error) {
 			if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
 				return scannerReport, err
 			}
-			leaks := checkRules(us.BaseScanner, emptyCommit(), us.repoName, workTreeFile.Name(), workTreeBuf.String())
-			for _, leak := range leaks {
-				if us.opts.Verbose {
-					logLeak(leak, us.opts.Redact)
+			lineNumber := 0
+			for _, line := range strings.Split(workTreeBuf.String(), "\n") {
+				lineNumber++
+				for _, rule := range us.cfg.Rules {
+					offender := rule.Inspect(line)
+					if offender == "" {
+						continue
+					}
+					if us.cfg.Allowlist.RegexAllowed(line) ||
+						rule.AllowList.FileAllowed(filepath.Base(workTreeFile.Name())) ||
+						rule.AllowList.PathAllowed(workTreeFile.Name()) {
+						continue
+					}
+					if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(workTreeFile.Name())) {
+						continue
+					}
+					if rule.Path.String() != "" && !rule.HasFilePathLeak(filepath.Base(workTreeFile.Name())) {
+						continue
+					}
+					leak := NewLeak(line, offender, defaultLineNumber).WithCommit(emptyCommit())
+					leak.File = workTreeFile.Name()
+					leak.LineNumber = lineNumber
+					leak.Repo = us.repoName
+					leak.Rule = rule.Description
+					leak.Tags = strings.Join(rule.Tags, ", ")
+					if us.opts.Verbose {
+						leak.Log(us.opts.Redact)
+					}
+					scannerReport.Leaks = append(scannerReport.Leaks, leak)
 				}
-				scannerReport.Leaks = append(scannerReport.Leaks, leak)
 			}
 		}
 		return scannerReport, nil
@@ -145,22 +168,36 @@ func (us *UnstagedScanner) Scan() (report.Report, error) {
 					diffContents += fmt.Sprintf("%s\n", d.Text)
 				}
 			}
-			leaks := checkRules(us.BaseScanner, c, us.repoName, filename, diffContents)
 
 			lineLookup := make(map[string]bool)
-			for _, leak := range leaks {
-				for lineNumber, line := range strings.Split(prettyDiff, "\n") {
-					if strings.HasPrefix(line, diffAddPrefix) && strings.Contains(line, leak.Line) {
-						if _, ok := lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)]; !ok {
-							lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)] = true
-							leak.LineNumber = lineNumber + 1
-							if us.opts.Verbose {
-								logLeak(leak, us.opts.Redact)
-							}
-							scannerReport.Leaks = append(scannerReport.Leaks, leak)
-							break
-						}
+
+			for _, line := range strings.Split(diffContents, "\n") {
+				for _, rule := range us.cfg.Rules {
+					offender := rule.Inspect(line)
+					if offender == "" {
+						continue
+					}
+					if us.cfg.Allowlist.RegexAllowed(line) ||
+						rule.AllowList.FileAllowed(filepath.Base(filename)) ||
+						rule.AllowList.PathAllowed(filename) {
+						continue
+					}
+					if rule.File.String() != "" && !rule.HasFileLeak(filepath.Base(filename)) {
+						continue
+					}
+					if rule.Path.String() != "" && !rule.HasFilePathLeak(filepath.Base(filename)) {
+						continue
+					}
+					leak := NewLeak(line, offender, defaultLineNumber).WithCommit(emptyCommit())
+					leak.File = filename
+					leak.LineNumber = extractLine(prettyDiff, leak, lineLookup) + 1
+					leak.Repo = us.repoName
+					leak.Rule = rule.Description
+					leak.Tags = strings.Join(rule.Tags, ", ")
+					if us.opts.Verbose {
+						leak.Log(us.opts.Redact)
 					}
+					scannerReport.Leaks = append(scannerReport.Leaks, leak)
 				}
 			}
 		}
@@ -184,6 +221,7 @@ func diffPrettyText(diffs []diffmatchpatch.Diff) string {
 			_, _ = buff.WriteString("-")
 			_, _ = buff.WriteString(text)
 		case diffmatchpatch.DiffEqual:
+			_, _ = buff.WriteString(" ")
 			_, _ = buff.WriteString(text)
 		}
 	}

+ 1 - 284
scan/utils.go

@@ -2,20 +2,14 @@ package scan
 
 import (
 	"bufio"
-	"encoding/json"
 	"fmt"
-	"math"
 	"os"
 	"path/filepath"
-	"regexp"
 	"runtime"
 	"strconv"
 	"strings"
 	"time"
 
-	"github.com/zricethezav/gitleaks/v7/report"
-
-	"github.com/zricethezav/gitleaks/v7/config"
 	"github.com/zricethezav/gitleaks/v7/options"
 
 	"github.com/go-git/go-git/v5"
@@ -30,8 +24,6 @@ const (
 	diffDelPrefix     = "-"
 	diffLineSignature = " @@"
 	defaultLineNumber = 1
-
-	maxLineLen = 200
 )
 
 func obtainCommit(repo *git.Repository, commitSha string) (*object.Commit, error) {
@@ -128,128 +120,6 @@ func howManyThreads(threads int) int {
 	return threads
 }
 
-func shouldLog(scanner BaseScanner) bool {
-	if scanner.opts.Verbose && scanner.scannerType != typeRepoScanner &&
-		scanner.scannerType != typeCommitScanner &&
-		scanner.scannerType != typeUnstagedScanner &&
-		scanner.scannerType != typeNoGitScanner {
-		return true
-	}
-	return false
-}
-
-func checkRules(scanner BaseScanner, commit *object.Commit, repoName, filePath, content string) []report.Leak {
-	filename := filepath.Base(filePath)
-	path := filepath.Dir(filePath)
-	var leaks []report.Leak
-
-	skipRuleLookup := make(map[string]bool)
-	// First do simple rule checks based on filename
-	if skipCheck(scanner.cfg, filename, path) {
-		return leaks
-	}
-
-	for _, rule := range scanner.cfg.Rules {
-		if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
-			continue
-		}
-
-		if skipRule(rule, filename, filePath, commit.Hash.String()) {
-			skipRuleLookup[rule.Description] = true
-			continue
-		}
-
-		// If it doesnt contain a Content regex then it is a filename regex match
-		if !ruleContainRegex(rule) {
-			leak := report.Leak{
-				LineNumber: defaultLineNumber,
-				Line:       "",
-				Offender:   "Filename/path offender: " + filename,
-				Commit:     commit.Hash.String(),
-				Repo:       repoName,
-				RepoURL:    scanner.opts.RepoURL,
-				Message:    commit.Message,
-				Rule:       rule.Description,
-				Author:     commit.Author.Name,
-				Email:      commit.Author.Email,
-				Date:       commit.Author.When,
-				Tags:       strings.Join(rule.Tags, ", "),
-				File:       filePath,
-				// Operation:  diffOpToString(bundle.Operation),
-			}
-			leak.LeakURL = leakURL(leak)
-			if shouldLog(scanner) {
-				logLeak(leak, scanner.opts.Redact)
-			}
-			leaks = append(leaks, leak)
-		}
-	}
-
-	lineNumber := 1
-
-	for _, line := range strings.Split(content, "\n") {
-		for _, rule := range scanner.cfg.Rules {
-			if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
-				break
-			}
-			if _, ok := skipRuleLookup[rule.Description]; ok {
-				continue
-			}
-
-			offender := rule.Regex.FindString(line)
-			if offender == "" {
-				continue
-			}
-
-			// check entropy
-			groups := rule.Regex.FindStringSubmatch(offender)
-			if isAllowListed(line, append(rule.AllowList.Regexes, scanner.cfg.Allowlist.Regexes...)) {
-				continue
-			}
-			if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
-				continue
-			}
-
-			// 0 is a match for the full regex pattern
-			if 0 < rule.ReportGroup && rule.ReportGroup < len(groups) {
-				offender = groups[rule.ReportGroup]
-			}
-
-			leak := report.Leak{
-				LineNumber: lineNumber,
-				Line:       line,
-				Offender:   offender,
-				Commit:     commit.Hash.String(),
-				Repo:       repoName,
-				RepoURL:    scanner.opts.RepoURL,
-				Message:    commit.Message,
-				Rule:       rule.Description,
-				Author:     commit.Author.Name,
-				Email:      commit.Author.Email,
-				Date:       commit.Author.When,
-				Tags:       strings.Join(rule.Tags, ", "),
-				File:       filePath,
-			}
-			leak.LeakURL = leakURL(leak)
-			if shouldLog(scanner) {
-				logLeak(leak, scanner.opts.Redact)
-			}
-			leaks = append(leaks, leak)
-		}
-		lineNumber++
-	}
-	return leaks
-}
-
-func logLeak(leak report.Leak, redact bool) {
-	if redact {
-		leak = report.RedactLeak(leak)
-	}
-	var b []byte
-	b, _ = json.MarshalIndent(leak, "", "	")
-	fmt.Println(string(b))
-}
-
 // getLogOptions determines what log options are used when iterating through commits.
 // It is similar to `git log {branch}`. Default behavior is to log ALL branches so
 // gitleaks gets the full git history.
@@ -300,152 +170,6 @@ func logOptions(repo *git.Repository, opts options.Options) (*git.LogOptions, er
 	return &git.LogOptions{All: true}, nil
 }
 
-func skipCheck(cfg config.Config, filename string, path string) bool {
-	// We want to check if there is a allowlist for this file
-	if len(cfg.Allowlist.Files) != 0 {
-		for _, reFileName := range cfg.Allowlist.Files {
-			if regexMatched(filename, reFileName) {
-				log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
-				return true
-			}
-		}
-	}
-
-	// We want to check if there is a allowlist for this path
-	if len(cfg.Allowlist.Paths) != 0 {
-		for _, reFilePath := range cfg.Allowlist.Paths {
-			if regexMatched(path, reFilePath) {
-				log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func skipRule(rule config.Rule, filename, path, commitSha string) bool {
-	// For each rule we want to check filename allowlists
-	if isAllowListed(filename, rule.AllowList.Files) || isAllowListed(path, rule.AllowList.Paths) {
-		return true
-	}
-
-	// If it has fileNameRegex and it doesnt match we continue to next rule
-	if ruleContainFileRegex(rule) && !regexMatched(filename, rule.File) {
-		return true
-	}
-
-	// If it has filePathRegex and it doesnt match we continue to next rule
-	if ruleContainPathRegex(rule) && !regexMatched(path, rule.Path) {
-		return true
-	}
-
-	return false
-}
-
-// regexMatched matched an interface to a regular expression. The interface f can
-// be a string type or go-git *object.File type.
-func regexMatched(f string, re *regexp.Regexp) bool {
-	if re == nil {
-		return false
-	}
-	if re.FindString(f) != "" {
-		return true
-	}
-	return false
-}
-
-// trippedEntropy checks if a given capture group or offender falls in between entropy ranges
-// supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
-func trippedEntropy(groups []string, rule config.Rule) bool {
-	for _, e := range rule.Entropies {
-		if len(groups) > e.Group {
-			entropy := shannonEntropy(groups[e.Group])
-			if entropy >= e.Min && entropy <= e.Max {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-// shannonEntropy calculates the entropy of data using the formula defined here:
-// https://en.wiktionary.org/wiki/Shannon_entropy
-// Another way to think about what this is doing is calculating the number of bits
-// needed to on average encode the data. So, the higher the entropy, the more random the data, the
-// more bits needed to encode that data.
-func shannonEntropy(data string) (entropy float64) {
-	if data == "" {
-		return 0
-	}
-
-	charCounts := make(map[rune]int)
-	for _, char := range data {
-		charCounts[char]++
-	}
-
-	invLength := 1.0 / float64(len(data))
-	for _, count := range charCounts {
-		freq := float64(count) * invLength
-		entropy -= freq * math.Log2(freq)
-	}
-
-	return entropy
-}
-
-// Checks if the given rule has a regex
-func ruleContainRegex(rule config.Rule) bool {
-	if rule.Regex == nil {
-		return false
-	}
-	if rule.Regex.String() == "" {
-		return false
-	}
-	return true
-}
-
-// Checks if the given rule has a file name regex
-func ruleContainFileRegex(rule config.Rule) bool {
-	if rule.File == nil {
-		return false
-	}
-	if rule.File.String() == "" {
-		return false
-	}
-	return true
-}
-
-// Checks if the given rule has a file path regex
-func ruleContainPathRegex(rule config.Rule) bool {
-	if rule.Path == nil {
-		return false
-	}
-	if rule.Path.String() == "" {
-		return false
-	}
-	return true
-}
-
-func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
-	for _, hash := range allowlistedCommits {
-		if commitHash == hash {
-			return true
-		}
-	}
-	return false
-}
-
-func isAllowListed(target string, allowList []*regexp.Regexp) bool {
-	if len(allowList) != 0 {
-		for _, re := range allowList {
-			if re.FindString(target) != "" {
-				return true
-			}
-		}
-	}
-	return false
-
-}
-
 func optsToCommits(opts options.Options) ([]string, error) {
 	if opts.Commits != "" {
 		return strings.Split(opts.Commits, ","), nil
@@ -464,7 +188,7 @@ func optsToCommits(opts options.Options) ([]string, error) {
 	return commits, nil
 }
 
-func extractLine(patchContent string, leak report.Leak, lineLookup map[string]bool) int {
+func extractLine(patchContent string, leak Leak, lineLookup map[string]bool) int {
 	i := strings.Index(patchContent, fmt.Sprintf("\n+++ b/%s", leak.File))
 	filePatchContent := patchContent[i+1:]
 	i = strings.Index(filePatchContent, "diff --git")
@@ -494,10 +218,3 @@ func extractLine(patchContent string, leak report.Leak, lineLookup map[string]bo
 	}
 	return defaultLineNumber
 }
-
-func leakURL(leak report.Leak) string {
-	if leak.RepoURL != "" {
-		return fmt.Sprintf("%s/blob/%s/%s#L%d", leak.RepoURL, leak.Commit, leak.File, leak.LineNumber)
-	}
-	return ""
-}

+ 4 - 0
test_data/test_configs/onlyFiles.toml

@@ -0,0 +1,4 @@
+[[rules]]
+	description = "flag go"
+	file = '''(.*?)(md)$'''
+	tags = ["key", "extensions"]

+ 8 - 4
test_data/test_dir1_aws_leak.json

@@ -3,28 +3,32 @@
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
   "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
-  "commit": "0000000000000000000000000000000000000000",
+  "commit": "",
   "repo": "",
+  "repoURL": "",
+  "leakURL": "",
   "rule": "AWS Access Key",
   "commitMessage": "",
   "author": "",
   "email": "",
   "file": "../test_data/test_repos/test_dir_1/server.test.py",
-  "date": "1970-01-01T00:00:00Z",
+  "date": "0001-01-01T00:00:00Z",
   "tags": "key, AWS"
  },
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
   "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
-  "commit": "0000000000000000000000000000000000000000",
+  "commit": "",
   "repo": "",
+  "repoURL": "",
+  "leakURL": "",
   "rule": "AWS Access Key",
   "commitMessage": "",
   "author": "",
   "email": "",
   "file": "../test_data/test_repos/test_dir_1/server.test2.py",
-  "date": "1970-01-01T00:00:00Z",
+  "date": "0001-01-01T00:00:00Z",
   "tags": "key, AWS"
  }
 ]

+ 4 - 2
test_data/test_file1_aws_leak.json

@@ -3,14 +3,16 @@
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
   "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
-  "commit": "0000000000000000000000000000000000000000",
+  "commit": "",
   "repo": "",
+  "repoURL": "",
+  "leakURL": "",
   "rule": "AWS Access Key",
   "commitMessage": "",
   "author": "",
   "email": "",
   "file": "../test_data/test_repos/test_dir_1/server.test.py",
-  "date": "1970-01-01T00:00:00Z",
+  "date": "0001-01-01T00:00:00Z",
   "tags": "key, AWS"
  }
 ]

+ 130 - 0
test_data/test_only_files_no_git.json

@@ -0,0 +1,130 @@
+[
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_2/no_secrets.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_2/no_secrets.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ },
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_2/secrets.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_2/secrets.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ },
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_3/no_secrets.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_3/no_secrets.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ },
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_3/secrets.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_3/secrets.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ },
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_4/no_secrets.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_4/no_secrets.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ },
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_4/secrets.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_4/secrets.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ },
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_5/notes.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_5/notes.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ },
+ {
+  "line": "",
+  "lineNumber": 1,
+  "offender": "Filename or path offender: ../test_data/test_repos/test_repo_8/README.md",
+  "commit": "",
+  "repo": "",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "flag go",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_repo_8/README.md",
+  "date": "0001-01-01T00:00:00Z",
+  "tags": "key, extensions"
+ }
+]