Преглед изворни кода

Add baseline (#975)

* Add baseline

* Update doc, add error, move baseline to detect namespace, ignore findings instead of reactively filter them out

* Update detect/detect.go

Co-authored-by: Zachary Rice <zricezrice@gmail.com>

* Update IsNew function (no check on tags - omit finger print check)

* Update README.md

Co-authored-by: Zachary Rice <zricezrice@gmail.com>

* Update examples in readme to make it ensure it's clear that a baseline is indeed a gitleaks report

* Fix test - updated tags doesn't make a finding new

* Add missing err assignment

* Allow scanner to continue without baseline if file is malformed

* Fix typo in comment

* Fix control flow err. (Real life testing)

* Fix wording

* Auto-ignore baseline path
Gawan Schroeder пре 3 година
родитељ
комит
4f6ee2bcf1

+ 18 - 0
README.md

@@ -156,6 +156,7 @@ Flags:
       --redact                 redact secrets from logs and stdout
       --redact                 redact secrets from logs and stdout
   -f, --report-format string   output format (json, csv, sarif)
   -f, --report-format string   output format (json, csv, sarif)
   -r, --report-path string     report file
   -r, --report-path string     report file
+  -b, --baseline-path          path to a previously generated report with known issues that gitleaks should ignore
   -s, --source string          path to source (git repo, directory, file)
   -s, --source string          path to source (git repo, directory, file)
   -v, --verbose                show verbose output from scan
   -v, --verbose                show verbose output from scan
 
 
@@ -190,6 +191,23 @@ as a pre-commit.
 
 
 **NOTE**: the `protect` command can only be used on git repos, running `protect` on files or directories will result in an error message.
 **NOTE**: the `protect` command can only be used on git repos, running `protect` on files or directories will result in an error message.
 
 
+### Creating a baseline
+
+When scanning large repositories or repositories with a long history, it can be convenient to use a baseline. When using a baseline, 
+gitleaks will ignore any old findings that are present in the baseline. A baseline can be any gitleaks report. To create a gitleaks report, run gitleaks with the `--report-path` parameter. 
+
+```
+gitleaks detect --report-path gitleaks-report.json # This will save the report in a file called gitleaks-report.json
+```
+
+Once as baseline is created it can be applied when running the detect command again:
+
+```
+gitleaks detect --baseline-path gitleaks-report.json --report-path findings.json
+```
+
+After running the detect command with the --baseline-path parameter, report output (findings.json) will only contain new issues.
+
 ### Verify Findings
 ### Verify Findings
 
 
 You can verify a finding found by gitleaks using a `git log` command.
 You can verify a finding found by gitleaks using a `git log` command.

+ 9 - 0
cmd/detect.go

@@ -75,6 +75,15 @@ func runDetect(cmd *cobra.Command, args []string) {
 		detector.AddGitleaksIgnore(filepath.Join(source, ".gitleaksignore"))
 		detector.AddGitleaksIgnore(filepath.Join(source, ".gitleaksignore"))
 	}
 	}
 
 
+	// ignore findings from the baseline (an existing report in json format generated earlier)
+	baselinePath, _ := cmd.Flags().GetString("baseline-path")
+	if baselinePath != "" {
+		err = detector.AddBaseline(baselinePath)
+		if err != nil {
+			log.Error().Msgf("Could not load baseline. The path must point of a gitleaks report generated using the default format: %s", err)
+		}
+	}
+
 	// set exit code
 	// set exit code
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	if err != nil {
 	if err != nil {

+ 1 - 0
cmd/root.go

@@ -42,6 +42,7 @@ func init() {
 	rootCmd.PersistentFlags().StringP("source", "s", ".", "path to source (default: $PWD)")
 	rootCmd.PersistentFlags().StringP("source", "s", ".", "path to source (default: $PWD)")
 	rootCmd.PersistentFlags().StringP("report-path", "r", "", "report file")
 	rootCmd.PersistentFlags().StringP("report-path", "r", "", "report file")
 	rootCmd.PersistentFlags().StringP("report-format", "f", "json", "output format (json, csv, sarif)")
 	rootCmd.PersistentFlags().StringP("report-format", "f", "json", "output format (json, csv, sarif)")
+	rootCmd.PersistentFlags().StringP("baseline-path", "b", "", "path to baseline with issues that can be ignored")
 	rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (trace, debug, info, warn, error, fatal)")
 	rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (trace, debug, info, warn, error, fatal)")
 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
 	rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")
 	rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")

+ 58 - 0
detect/baseline.go

@@ -0,0 +1,58 @@
+package detect
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+
+	"github.com/zricethezav/gitleaks/v8/report"
+)
+
+func IsNew(finding report.Finding, baseline []report.Finding) bool {
+	// Explicitly testing each property as it gives significantly better performance in comparison to cmp.Equal(). Drawback is that
+	// the code requires maintanance if/when the Finding struct changes
+	for _, b := range baseline {
+
+		if finding.Author == b.Author &&
+			finding.Commit == b.Commit &&
+			finding.Date == b.Date &&
+			finding.Description == b.Description &&
+			finding.Email == b.Email &&
+			finding.EndColumn == b.EndColumn &&
+			finding.EndLine == b.EndLine &&
+			finding.Entropy == b.Entropy &&
+			finding.File == b.File &&
+			// Omit checking finding.Fingerprint - if the format of the fingerprint changes, the users will see unexpected behaviour
+			finding.Match == b.Match &&
+			finding.Message == b.Message &&
+			finding.RuleID == b.RuleID &&
+			finding.Secret == b.Secret &&
+			finding.StartColumn == b.StartColumn &&
+			finding.StartLine == b.StartLine {
+			return false
+		}
+	}
+	return true
+}
+
+func LoadBaseline(baselinePath string) ([]report.Finding, error) {
+	var previousFindings []report.Finding
+	jsonFile, err := os.Open(baselinePath)
+	if err != nil {
+		return nil, fmt.Errorf("could not open %s", baselinePath)
+	}
+
+	bytes, err := ioutil.ReadAll(jsonFile)
+	jsonFile.Close()
+	if err != nil {
+		return nil, fmt.Errorf("could not read data from the file %s", baselinePath)
+	}
+
+	err = json.Unmarshal(bytes, &previousFindings)
+	if err != nil {
+		return nil, fmt.Errorf("the format of the file %s is not supported", baselinePath)
+	}
+
+	return previousFindings, nil
+}

+ 137 - 0
detect/baseline_test.go

@@ -0,0 +1,137 @@
+package detect
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/zricethezav/gitleaks/v8/report"
+)
+
+func TestIsNew(t *testing.T) {
+	tests := []struct {
+		findings report.Finding
+		baseline []report.Finding
+		expect   bool
+	}{
+		{
+			findings: report.Finding{
+				Author: "a",
+				Commit: "0000",
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "0000",
+				},
+			},
+			expect: false,
+		},
+		{
+			findings: report.Finding{
+				Author: "a",
+				Commit: "0000",
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "0002",
+				},
+			},
+			expect: true,
+		},
+		{
+			findings: report.Finding{
+				Author: "a",
+				Commit: "0000",
+				Tags:   []string{"a", "b"},
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "0000",
+					Tags:   []string{"a", "c"},
+				},
+			},
+			expect: false, // Updated tags doesn't make it a new finding
+		},
+	}
+	for _, test := range tests {
+		assert.Equal(t, test.expect, IsNew(test.findings, test.baseline))
+	}
+}
+
+func TestFileLoadBaseline(t *testing.T) {
+	tests := []struct {
+		Filename      string
+		ExpectedError error
+	}{
+		{
+			Filename:      "../testdata/baseline/baseline.csv",
+			ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.csv is not supported"),
+		},
+		{
+			Filename:      "../testdata/baseline/baseline.sarif",
+			ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.sarif is not supported"),
+		},
+		{
+			Filename:      "../testdata/baseline/notfound.json",
+			ExpectedError: errors.New("could not open ../testdata/baseline/notfound.json"),
+		},
+	}
+
+	for _, test := range tests {
+		_, err := LoadBaseline(test.Filename)
+		assert.Equal(t, test.ExpectedError.Error(), err.Error())
+	}
+}
+
+func TestIgnoreIssuesInBaseline(t *testing.T) {
+	tests := []struct {
+		findings    []report.Finding
+		baseline    []report.Finding
+		expectCount int
+	}{
+		{
+			findings: []report.Finding{
+				{
+					Author: "a",
+					Commit: "5",
+				},
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "5",
+				},
+			},
+			expectCount: 0,
+		},
+		{
+			findings: []report.Finding{
+				{
+					Author:      "a",
+					Commit:      "5",
+					Fingerprint: "a",
+				},
+			},
+			baseline: []report.Finding{
+				{
+					Author:      "a",
+					Commit:      "5",
+					Fingerprint: "b",
+				},
+			},
+			expectCount: 0,
+		},
+	}
+
+	for _, test := range tests {
+		d, _ := NewDetectorDefaultConfig()
+		d.baseline = test.baseline
+		for _, finding := range test.findings {
+			d.addFinding(finding)
+		}
+		assert.Equal(t, test.expectCount, len(d.findings))
+	}
+}

+ 24 - 1
detect/detect.go

@@ -66,6 +66,12 @@ type Detector struct {
 	// matching given a set of words (keywords from the rules in the config)
 	// matching given a set of words (keywords from the rules in the config)
 	prefilter ahocorasick.AhoCorasick
 	prefilter ahocorasick.AhoCorasick
 
 
+	// a list of known findings that should be ignored
+	baseline []report.Finding
+
+	// path to baseline
+	baselinePath string
+
 	// gitleaksIgnore
 	// gitleaksIgnore
 	gitleaksIgnore map[string]bool
 	gitleaksIgnore map[string]bool
 }
 }
@@ -145,6 +151,18 @@ func (d *Detector) AddGitleaksIgnore(gitleaksIgnorePath string) error {
 	return nil
 	return nil
 }
 }
 
 
+func (d *Detector) AddBaseline(baselinePath string) error {
+	if baselinePath != "" {
+		baseline, err := LoadBaseline(baselinePath)
+		if err != nil {
+			return err
+		}
+		d.baseline = baseline
+	}
+	d.baselinePath = baselinePath
+	return nil
+}
+
 // DetectBytes scans the given bytes and returns a list of findings
 // DetectBytes scans the given bytes and returns a list of findings
 func (d *Detector) DetectBytes(content []byte) []report.Finding {
 func (d *Detector) DetectBytes(content []byte) []report.Finding {
 	return d.DetectString(string(content))
 	return d.DetectString(string(content))
@@ -424,7 +442,7 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 
 
 	// check if filepath is allowed
 	// check if filepath is allowed
 	if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
 	if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
-		fragment.FilePath == d.Config.Path) {
+		fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) {
 		return findings
 		return findings
 	}
 	}
 
 
@@ -473,6 +491,11 @@ func (d *Detector) addFinding(finding report.Finding) {
 		return
 		return
 	}
 	}
 
 
+	if d.baseline != nil && !IsNew(finding, d.baseline) {
+		log.Debug().Msgf("baseline duplicate -- ignoring finding with Fingerprint %s", finding.Fingerprint)
+		return
+	}
+
 	d.findingMutex.Lock()
 	d.findingMutex.Lock()
 	d.findings = append(d.findings, finding)
 	d.findings = append(d.findings, finding)
 	if d.Verbose {
 	if d.Verbose {

+ 2 - 0
testdata/baseline/baseline.csv

@@ -0,0 +1,2 @@
+RuleID,Commit,File,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint
+1,b,c,f,s,m,s,e,s,e,a,m,f,r,f

+ 40 - 0
testdata/baseline/baseline.json

@@ -0,0 +1,40 @@
+[
+ {
+  "Description": "PyPI upload token",
+  "StartLine": 32,
+  "EndLine": 32,
+  "StartColumn": 21,
+  "EndColumn": 106,
+  "Match": "************************",
+  "Secret": "************************",
+  "File": "detect/detect_test.go",
+  "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2",
+  "Entropy": 1.9606875,
+  "Author": "****",
+  "Email": "****",
+  "Date": "2022-03-07T14:33:06Z",
+  "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test",
+  "Tags": [],
+  "RuleID": "pypi-upload-token",
+  "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:32"
+ },
+ {
+  "Description": "PyPI upload token",
+  "StartLine": 33,
+  "EndLine": 33,
+  "StartColumn": 21,
+  "EndColumn": 106,
+  "Match": "************************",
+  "Secret": "************************",
+  "File": "detect/detect_test.go",
+  "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2",
+  "Entropy": 1.9606875,
+  "Author": "****",
+  "Email": "****",
+  "Date": "2022-03-07T14:33:06Z",
+  "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test",
+  "Tags": [],
+  "RuleID": "pypi-upload-token",
+  "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:33"
+ }
+]

+ 6 - 0
testdata/baseline/baseline.sarif

@@ -0,0 +1,6 @@
+{
+ "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
+ "version": "2.1.0",
+ "runs": [
+ ]
+}