Sfoglia il codice sorgente

Add baseline (#975)

* Add baseline

* Update doc, add error, move baseline to detect namespace, ignore findings instead of reactively filter them out

* Update detect/detect.go

Co-authored-by: Zachary Rice <zricezrice@gmail.com>

* Update IsNew function (no check on tags - omit finger print check)

* Update README.md

Co-authored-by: Zachary Rice <zricezrice@gmail.com>

* Update examples in readme to make it ensure it's clear that a baseline is indeed a gitleaks report

* Fix test - updated tags doesn't make a finding new

* Add missing err assignment

* Allow scanner to continue without baseline if file is malformed

* Fix typo in comment

* Fix control flow err. (Real life testing)

* Fix wording

* Auto-ignore baseline path
Gawan Schroeder 3 anni fa
parent
commit
4f6ee2bcf1

+ 18 - 0
README.md

@@ -156,6 +156,7 @@ Flags:
       --redact                 redact secrets from logs and stdout
   -f, --report-format string   output format (json, csv, sarif)
   -r, --report-path string     report file
+  -b, --baseline-path          path to a previously generated report with known issues that gitleaks should ignore
   -s, --source string          path to source (git repo, directory, file)
   -v, --verbose                show verbose output from scan
 
@@ -190,6 +191,23 @@ as a pre-commit.
 
 **NOTE**: the `protect` command can only be used on git repos, running `protect` on files or directories will result in an error message.
 
+### Creating a baseline
+
+When scanning large repositories or repositories with a long history, it can be convenient to use a baseline. When using a baseline, 
+gitleaks will ignore any old findings that are present in the baseline. A baseline can be any gitleaks report. To create a gitleaks report, run gitleaks with the `--report-path` parameter. 
+
+```
+gitleaks detect --report-path gitleaks-report.json # This will save the report in a file called gitleaks-report.json
+```
+
+Once as baseline is created it can be applied when running the detect command again:
+
+```
+gitleaks detect --baseline-path gitleaks-report.json --report-path findings.json
+```
+
+After running the detect command with the --baseline-path parameter, report output (findings.json) will only contain new issues.
+
 ### Verify Findings
 
 You can verify a finding found by gitleaks using a `git log` command.

+ 9 - 0
cmd/detect.go

@@ -75,6 +75,15 @@ func runDetect(cmd *cobra.Command, args []string) {
 		detector.AddGitleaksIgnore(filepath.Join(source, ".gitleaksignore"))
 	}
 
+	// ignore findings from the baseline (an existing report in json format generated earlier)
+	baselinePath, _ := cmd.Flags().GetString("baseline-path")
+	if baselinePath != "" {
+		err = detector.AddBaseline(baselinePath)
+		if err != nil {
+			log.Error().Msgf("Could not load baseline. The path must point of a gitleaks report generated using the default format: %s", err)
+		}
+	}
+
 	// set exit code
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	if err != nil {

+ 1 - 0
cmd/root.go

@@ -42,6 +42,7 @@ func init() {
 	rootCmd.PersistentFlags().StringP("source", "s", ".", "path to source (default: $PWD)")
 	rootCmd.PersistentFlags().StringP("report-path", "r", "", "report file")
 	rootCmd.PersistentFlags().StringP("report-format", "f", "json", "output format (json, csv, sarif)")
+	rootCmd.PersistentFlags().StringP("baseline-path", "b", "", "path to baseline with issues that can be ignored")
 	rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (trace, debug, info, warn, error, fatal)")
 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
 	rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")

+ 58 - 0
detect/baseline.go

@@ -0,0 +1,58 @@
+package detect
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+
+	"github.com/zricethezav/gitleaks/v8/report"
+)
+
+func IsNew(finding report.Finding, baseline []report.Finding) bool {
+	// Explicitly testing each property as it gives significantly better performance in comparison to cmp.Equal(). Drawback is that
+	// the code requires maintanance if/when the Finding struct changes
+	for _, b := range baseline {
+
+		if finding.Author == b.Author &&
+			finding.Commit == b.Commit &&
+			finding.Date == b.Date &&
+			finding.Description == b.Description &&
+			finding.Email == b.Email &&
+			finding.EndColumn == b.EndColumn &&
+			finding.EndLine == b.EndLine &&
+			finding.Entropy == b.Entropy &&
+			finding.File == b.File &&
+			// Omit checking finding.Fingerprint - if the format of the fingerprint changes, the users will see unexpected behaviour
+			finding.Match == b.Match &&
+			finding.Message == b.Message &&
+			finding.RuleID == b.RuleID &&
+			finding.Secret == b.Secret &&
+			finding.StartColumn == b.StartColumn &&
+			finding.StartLine == b.StartLine {
+			return false
+		}
+	}
+	return true
+}
+
+func LoadBaseline(baselinePath string) ([]report.Finding, error) {
+	var previousFindings []report.Finding
+	jsonFile, err := os.Open(baselinePath)
+	if err != nil {
+		return nil, fmt.Errorf("could not open %s", baselinePath)
+	}
+
+	bytes, err := ioutil.ReadAll(jsonFile)
+	jsonFile.Close()
+	if err != nil {
+		return nil, fmt.Errorf("could not read data from the file %s", baselinePath)
+	}
+
+	err = json.Unmarshal(bytes, &previousFindings)
+	if err != nil {
+		return nil, fmt.Errorf("the format of the file %s is not supported", baselinePath)
+	}
+
+	return previousFindings, nil
+}

+ 137 - 0
detect/baseline_test.go

@@ -0,0 +1,137 @@
+package detect
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/zricethezav/gitleaks/v8/report"
+)
+
+func TestIsNew(t *testing.T) {
+	tests := []struct {
+		findings report.Finding
+		baseline []report.Finding
+		expect   bool
+	}{
+		{
+			findings: report.Finding{
+				Author: "a",
+				Commit: "0000",
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "0000",
+				},
+			},
+			expect: false,
+		},
+		{
+			findings: report.Finding{
+				Author: "a",
+				Commit: "0000",
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "0002",
+				},
+			},
+			expect: true,
+		},
+		{
+			findings: report.Finding{
+				Author: "a",
+				Commit: "0000",
+				Tags:   []string{"a", "b"},
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "0000",
+					Tags:   []string{"a", "c"},
+				},
+			},
+			expect: false, // Updated tags doesn't make it a new finding
+		},
+	}
+	for _, test := range tests {
+		assert.Equal(t, test.expect, IsNew(test.findings, test.baseline))
+	}
+}
+
+func TestFileLoadBaseline(t *testing.T) {
+	tests := []struct {
+		Filename      string
+		ExpectedError error
+	}{
+		{
+			Filename:      "../testdata/baseline/baseline.csv",
+			ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.csv is not supported"),
+		},
+		{
+			Filename:      "../testdata/baseline/baseline.sarif",
+			ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.sarif is not supported"),
+		},
+		{
+			Filename:      "../testdata/baseline/notfound.json",
+			ExpectedError: errors.New("could not open ../testdata/baseline/notfound.json"),
+		},
+	}
+
+	for _, test := range tests {
+		_, err := LoadBaseline(test.Filename)
+		assert.Equal(t, test.ExpectedError.Error(), err.Error())
+	}
+}
+
+func TestIgnoreIssuesInBaseline(t *testing.T) {
+	tests := []struct {
+		findings    []report.Finding
+		baseline    []report.Finding
+		expectCount int
+	}{
+		{
+			findings: []report.Finding{
+				{
+					Author: "a",
+					Commit: "5",
+				},
+			},
+			baseline: []report.Finding{
+				{
+					Author: "a",
+					Commit: "5",
+				},
+			},
+			expectCount: 0,
+		},
+		{
+			findings: []report.Finding{
+				{
+					Author:      "a",
+					Commit:      "5",
+					Fingerprint: "a",
+				},
+			},
+			baseline: []report.Finding{
+				{
+					Author:      "a",
+					Commit:      "5",
+					Fingerprint: "b",
+				},
+			},
+			expectCount: 0,
+		},
+	}
+
+	for _, test := range tests {
+		d, _ := NewDetectorDefaultConfig()
+		d.baseline = test.baseline
+		for _, finding := range test.findings {
+			d.addFinding(finding)
+		}
+		assert.Equal(t, test.expectCount, len(d.findings))
+	}
+}

+ 24 - 1
detect/detect.go

@@ -66,6 +66,12 @@ type Detector struct {
 	// matching given a set of words (keywords from the rules in the config)
 	prefilter ahocorasick.AhoCorasick
 
+	// a list of known findings that should be ignored
+	baseline []report.Finding
+
+	// path to baseline
+	baselinePath string
+
 	// gitleaksIgnore
 	gitleaksIgnore map[string]bool
 }
@@ -145,6 +151,18 @@ func (d *Detector) AddGitleaksIgnore(gitleaksIgnorePath string) error {
 	return nil
 }
 
+func (d *Detector) AddBaseline(baselinePath string) error {
+	if baselinePath != "" {
+		baseline, err := LoadBaseline(baselinePath)
+		if err != nil {
+			return err
+		}
+		d.baseline = baseline
+	}
+	d.baselinePath = baselinePath
+	return nil
+}
+
 // DetectBytes scans the given bytes and returns a list of findings
 func (d *Detector) DetectBytes(content []byte) []report.Finding {
 	return d.DetectString(string(content))
@@ -424,7 +442,7 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
 
 	// check if filepath is allowed
 	if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
-		fragment.FilePath == d.Config.Path) {
+		fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) {
 		return findings
 	}
 
@@ -473,6 +491,11 @@ func (d *Detector) addFinding(finding report.Finding) {
 		return
 	}
 
+	if d.baseline != nil && !IsNew(finding, d.baseline) {
+		log.Debug().Msgf("baseline duplicate -- ignoring finding with Fingerprint %s", finding.Fingerprint)
+		return
+	}
+
 	d.findingMutex.Lock()
 	d.findings = append(d.findings, finding)
 	if d.Verbose {

+ 2 - 0
testdata/baseline/baseline.csv

@@ -0,0 +1,2 @@
+RuleID,Commit,File,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint
+1,b,c,f,s,m,s,e,s,e,a,m,f,r,f

+ 40 - 0
testdata/baseline/baseline.json

@@ -0,0 +1,40 @@
+[
+ {
+  "Description": "PyPI upload token",
+  "StartLine": 32,
+  "EndLine": 32,
+  "StartColumn": 21,
+  "EndColumn": 106,
+  "Match": "************************",
+  "Secret": "************************",
+  "File": "detect/detect_test.go",
+  "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2",
+  "Entropy": 1.9606875,
+  "Author": "****",
+  "Email": "****",
+  "Date": "2022-03-07T14:33:06Z",
+  "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test",
+  "Tags": [],
+  "RuleID": "pypi-upload-token",
+  "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:32"
+ },
+ {
+  "Description": "PyPI upload token",
+  "StartLine": 33,
+  "EndLine": 33,
+  "StartColumn": 21,
+  "EndColumn": 106,
+  "Match": "************************",
+  "Secret": "************************",
+  "File": "detect/detect_test.go",
+  "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2",
+  "Entropy": 1.9606875,
+  "Author": "****",
+  "Email": "****",
+  "Date": "2022-03-07T14:33:06Z",
+  "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test",
+  "Tags": [],
+  "RuleID": "pypi-upload-token",
+  "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:33"
+ }
+]

+ 6 - 0
testdata/baseline/baseline.sarif

@@ -0,0 +1,6 @@
+{
+ "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
+ "version": "2.1.0",
+ "runs": [
+ ]
+}