Explorar el Código

Add support for following symlinks (#1010)

* Add support for following symlinks

* Update detect/detect.go

Co-authored-by: Zachary Rice <zricezrice@gmail.com>

* Update cmd/detect.go

Co-authored-by: Zachary Rice <zricezrice@gmail.com>
Ricky Grassmuck hace 3 años
padre
commit
6d801ed61c

+ 7 - 0
cmd/detect.go

@@ -18,6 +18,8 @@ func init() {
 	rootCmd.AddCommand(detectCmd)
 	rootCmd.AddCommand(detectCmd)
 	detectCmd.Flags().String("log-opts", "", "git log options")
 	detectCmd.Flags().String("log-opts", "", "git log options")
 	detectCmd.Flags().Bool("no-git", false, "treat git repo as a regular directory and scan those files, --log-opts has no effect on the scan when --no-git is set")
 	detectCmd.Flags().Bool("no-git", false, "treat git repo as a regular directory and scan those files, --log-opts has no effect on the scan when --no-git is set")
+	detectCmd.Flags().Bool("follow-symlinks", false, "Scan files that are symlinks to other files")
+
 }
 }
 
 
 var detectCmd = &cobra.Command{
 var detectCmd = &cobra.Command{
@@ -89,6 +91,11 @@ func runDetect(cmd *cobra.Command, args []string) {
 		}
 		}
 	}
 	}
 
 
+	// set follow symlinks flag
+	if detector.FollowSymlinks, err = cmd.Flags().GetBool("follow-symlinks"); err != nil {
+		log.Fatal().Err(err).Msg("")
+	}
+
 	// set exit code
 	// set exit code
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	if err != nil {
 	if err != nil {

+ 38 - 5
detect/detect.go

@@ -4,6 +4,7 @@ import (
 	"bufio"
 	"bufio"
 	"context"
 	"context"
 	"fmt"
 	"fmt"
+	"io/fs"
 	"os"
 	"os"
 	"path/filepath"
 	"path/filepath"
 	"regexp"
 	"regexp"
@@ -52,6 +53,9 @@ type Detector struct {
 	// files larger than this will be skipped
 	// files larger than this will be skipped
 	MaxTargetMegaBytes int
 	MaxTargetMegaBytes int
 
 
+	// followSymlinks is a flag to enable scanning symlink files
+	FollowSymlinks bool
+
 	// commitMap is used to keep track of commits that have been scanned.
 	// commitMap is used to keep track of commits that have been scanned.
 	// This is only used for logging purposes and git scans.
 	// This is only used for logging purposes and git scans.
 	commitMap map[string]bool
 	commitMap map[string]bool
@@ -85,7 +89,8 @@ type Fragment struct {
 	Raw string
 	Raw string
 
 
 	// FilePath is the path to the file if applicable
 	// FilePath is the path to the file if applicable
-	FilePath string
+	FilePath    string
+	SymlinkFile string
 
 
 	// CommitSHA is the SHA of the commit if applicable
 	// CommitSHA is the SHA of the commit if applicable
 	CommitSHA string
 	CommitSHA string
@@ -194,6 +199,7 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
 			finding := report.Finding{
 			finding := report.Finding{
 				Description: rule.Description,
 				Description: rule.Description,
 				File:        fragment.FilePath,
 				File:        fragment.FilePath,
+				SymlinkFile: fragment.SymlinkFile,
 				RuleID:      rule.RuleID,
 				RuleID:      rule.RuleID,
 				Match:       fmt.Sprintf("file detected: %s", fragment.FilePath),
 				Match:       fmt.Sprintf("file detected: %s", fragment.FilePath),
 				Tags:        rule.Tags,
 				Tags:        rule.Tags,
@@ -241,6 +247,7 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
 		finding := report.Finding{
 		finding := report.Finding{
 			Description: rule.Description,
 			Description: rule.Description,
 			File:        fragment.FilePath,
 			File:        fragment.FilePath,
+			SymlinkFile: fragment.SymlinkFile,
 			RuleID:      rule.RuleID,
 			RuleID:      rule.RuleID,
 			StartLine:   loc.startLine,
 			StartLine:   loc.startLine,
 			EndLine:     loc.endLine,
 			EndLine:     loc.endLine,
@@ -384,11 +391,16 @@ func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanT
 	return d.findings, nil
 	return d.findings, nil
 }
 }
 
 
+type scanTarget struct {
+	Path    string
+	Symlink string
+}
+
 // DetectFiles accepts a path to a source directory or file and begins a scan of the
 // DetectFiles accepts a path to a source directory or file and begins a scan of the
 // file or directory.
 // file or directory.
 func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
 func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
 	s := semgroup.NewGroup(context.Background(), 4)
 	s := semgroup.NewGroup(context.Background(), 4)
-	paths := make(chan string)
+	paths := make(chan scanTarget)
 	s.Go(func() error {
 	s.Go(func() error {
 		defer close(paths)
 		defer close(paths)
 		return filepath.Walk(source,
 		return filepath.Walk(source,
@@ -403,7 +415,25 @@ func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
 					return nil
 					return nil
 				}
 				}
 				if fInfo.Mode().IsRegular() {
 				if fInfo.Mode().IsRegular() {
-					paths <- path
+					paths <- scanTarget{
+						Path:    path,
+						Symlink: "",
+					}
+				}
+				if fInfo.Mode().Type() == fs.ModeSymlink && d.FollowSymlinks {
+					realPath, err := filepath.EvalSymlinks(path)
+					if err != nil {
+						return err
+					}
+					realPathFileInfo, _ := os.Stat(realPath)
+					if realPathFileInfo.IsDir() {
+						log.Debug().Msgf("found symlinked directory: %s -> %s [skipping]", path, realPath)
+						return nil
+					}
+					paths <- scanTarget{
+						Path:    realPath,
+						Symlink: path,
+					}
 				}
 				}
 				return nil
 				return nil
 			})
 			})
@@ -411,7 +441,7 @@ func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
 	for pa := range paths {
 	for pa := range paths {
 		p := pa
 		p := pa
 		s.Go(func() error {
 		s.Go(func() error {
-			b, err := os.ReadFile(p)
+			b, err := os.ReadFile(p.Path)
 			if err != nil {
 			if err != nil {
 				return err
 				return err
 			}
 			}
@@ -426,7 +456,10 @@ func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
 
 
 			fragment := Fragment{
 			fragment := Fragment{
 				Raw:      string(b),
 				Raw:      string(b),
-				FilePath: p,
+				FilePath: p.Path,
+			}
+			if p.Symlink != "" {
+				fragment.SymlinkFile = p.Symlink
 			}
 			}
 			for _, finding := range d.Detect(fragment) {
 			for _, finding := range d.Detect(fragment) {
 				// need to add 1 since line counting starts at 1
 				// need to add 1 since line counting starts at 1

+ 57 - 0
detect/detect_test.go

@@ -491,6 +491,7 @@ func TestFromFiles(t *testing.T) {
 					Secret:      "AKIALALEMEL33243OLIA",
 					Secret:      "AKIALALEMEL33243OLIA",
 					Line:        "\n\tawsToken := \"AKIALALEMEL33243OLIA\"",
 					Line:        "\n\tawsToken := \"AKIALALEMEL33243OLIA\"",
 					File:        "../testdata/repos/nogit/main.go",
 					File:        "../testdata/repos/nogit/main.go",
+					SymlinkFile: "",
 					RuleID:      "aws-access-key",
 					RuleID:      "aws-access-key",
 					Tags:        []string{"key", "AWS"},
 					Tags:        []string{"key", "AWS"},
 					Entropy:     3.0841837,
 					Entropy:     3.0841837,
@@ -537,6 +538,7 @@ func TestFromFiles(t *testing.T) {
 		}
 		}
 		cfg, _ := vc.Translate()
 		cfg, _ := vc.Translate()
 		detector := NewDetector(cfg)
 		detector := NewDetector(cfg)
+		detector.FollowSymlinks = true
 		findings, err := detector.DetectFiles(tt.source)
 		findings, err := detector.DetectFiles(tt.source)
 		if err != nil {
 		if err != nil {
 			t.Error(err)
 			t.Error(err)
@@ -546,6 +548,61 @@ func TestFromFiles(t *testing.T) {
 	}
 	}
 }
 }
 
 
+func TestDetectWithSymlinks(t *testing.T) {
+	tests := []struct {
+		cfgName          string
+		source           string
+		expectedFindings []report.Finding
+	}{
+		{
+			source:  filepath.Join(repoBasePath, "symlinks/file_symlink"),
+			cfgName: "simple",
+			expectedFindings: []report.Finding{
+				{
+					Description: "Asymmetric Private Key",
+					StartLine:   1,
+					EndLine:     1,
+					StartColumn: 1,
+					EndColumn:   35,
+					Match:       "-----BEGIN OPENSSH PRIVATE KEY-----",
+					Secret:      "-----BEGIN OPENSSH PRIVATE KEY-----",
+					Line:        "-----BEGIN OPENSSH PRIVATE KEY-----",
+					File:        "../testdata/repos/symlinks/source_file/id_ed25519",
+					SymlinkFile: "../testdata/repos/symlinks/file_symlink/symlinked_id_ed25519",
+					RuleID:      "apkey",
+					Tags:        []string{"key", "AsymmetricPrivateKey"},
+					Entropy:     3.587164,
+					Fingerprint: "../testdata/repos/symlinks/source_file/id_ed25519:apkey:1",
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		viper.AddConfigPath(configPath)
+		viper.SetConfigName("simple")
+		viper.SetConfigType("toml")
+		err := viper.ReadInConfig()
+		if err != nil {
+			t.Error(err)
+		}
+
+		var vc config.ViperConfig
+		err = viper.Unmarshal(&vc)
+		if err != nil {
+			t.Error(err)
+		}
+		cfg, _ := vc.Translate()
+		detector := NewDetector(cfg)
+		detector.FollowSymlinks = true
+		findings, err := detector.DetectFiles(tt.source)
+		if err != nil {
+			t.Error(err)
+		}
+		assert.ElementsMatch(t, tt.expectedFindings, findings)
+	}
+}
+
 func moveDotGit(from, to string) error {
 func moveDotGit(from, to string) error {
 	repoDirs, err := os.ReadDir("../testdata/repos")
 	repoDirs, err := os.ReadDir("../testdata/repos")
 	if err != nil {
 	if err != nil {

+ 2 - 0
report/csv.go

@@ -16,6 +16,7 @@ func writeCsv(f []Finding, w io.WriteCloser) error {
 	err := cw.Write([]string{"RuleID",
 	err := cw.Write([]string{"RuleID",
 		"Commit",
 		"Commit",
 		"File",
 		"File",
+		"SymlinkFile",
 		"Secret",
 		"Secret",
 		"Match",
 		"Match",
 		"StartLine",
 		"StartLine",
@@ -35,6 +36,7 @@ func writeCsv(f []Finding, w io.WriteCloser) error {
 		err = cw.Write([]string{f.RuleID,
 		err = cw.Write([]string{f.RuleID,
 			f.Commit,
 			f.Commit,
 			f.File,
 			f.File,
+			f.SymlinkFile,
 			f.Secret,
 			f.Secret,
 			f.Match,
 			f.Match,
 			strconv.Itoa(f.StartLine),
 			strconv.Itoa(f.StartLine),

+ 1 - 0
report/csv_test.go

@@ -28,6 +28,7 @@ func TestWriteCSV(t *testing.T) {
 					EndColumn:   2,
 					EndColumn:   2,
 					Message:     "opps",
 					Message:     "opps",
 					File:        "auth.py",
 					File:        "auth.py",
+					SymlinkFile: "",
 					Commit:      "0000000000000000",
 					Commit:      "0000000000000000",
 					Author:      "John Doe",
 					Author:      "John Doe",
 					Email:       "johndoe@gmail.com",
 					Email:       "johndoe@gmail.com",

+ 3 - 3
report/finding.go

@@ -22,9 +22,9 @@ type Finding struct {
 	Secret string
 	Secret string
 
 
 	// File is the name of the file containing the finding
 	// File is the name of the file containing the finding
-	File string
-
-	Commit string
+	File        string
+	SymlinkFile string
+	Commit      string
 
 
 	// Entropy is the shannon entropy of Value
 	// Entropy is the shannon entropy of Value
 	Entropy float32
 	Entropy float32

+ 1 - 0
report/json_test.go

@@ -30,6 +30,7 @@ func TestWriteJSON(t *testing.T) {
 					EndColumn:   2,
 					EndColumn:   2,
 					Message:     "opps",
 					Message:     "opps",
 					File:        "auth.py",
 					File:        "auth.py",
+					SymlinkFile: "",
 					Commit:      "0000000000000000",
 					Commit:      "0000000000000000",
 					Author:      "John Doe",
 					Author:      "John Doe",
 					Email:       "johndoe@gmail.com",
 					Email:       "johndoe@gmail.com",

+ 5 - 1
report/sarif.go

@@ -109,11 +109,15 @@ func getResults(findings []Finding) []Results {
 }
 }
 
 
 func getLocation(f Finding) []Locations {
 func getLocation(f Finding) []Locations {
+	uri := f.File
+	if f.SymlinkFile != "" {
+		uri = f.SymlinkFile
+	}
 	return []Locations{
 	return []Locations{
 		{
 		{
 			PhysicalLocation: PhysicalLocation{
 			PhysicalLocation: PhysicalLocation{
 				ArtifactLocation: ArtifactLocation{
 				ArtifactLocation: ArtifactLocation{
-					URI: f.File,
+					URI: uri,
 				},
 				},
 				Region: Region{
 				Region: Region{
 					StartLine:   f.StartLine,
 					StartLine:   f.StartLine,

+ 2 - 2
testdata/expected/report/csv_simple.csv

@@ -1,2 +1,2 @@
-RuleID,Commit,File,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint
-test-rule,0000000000000000,auth.py,a secret,line containing secret,1,2,1,2,John Doe,opps,10-19-2003,johndoe@gmail.com,fingerprint
+RuleID,Commit,File,SymlinkFile,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint
+test-rule,0000000000000000,auth.py,,a secret,line containing secret,1,2,1,2,John Doe,opps,10-19-2003,johndoe@gmail.com,fingerprint

+ 1 - 0
testdata/expected/report/json_simple.json

@@ -8,6 +8,7 @@
   "Match": "line containing secret",
   "Match": "line containing secret",
   "Secret": "a secret",
   "Secret": "a secret",
   "File": "auth.py",
   "File": "auth.py",
+  "SymlinkFile": "",
   "Commit": "0000000000000000",
   "Commit": "0000000000000000",
   "Entropy": 0,
   "Entropy": 0,
   "Author": "John Doe",
   "Author": "John Doe",

+ 1 - 0
testdata/repos/symlinks/file_symlink/symlinked_id_ed25519

@@ -0,0 +1 @@
+../source_file/id_ed25519

+ 7 - 0
testdata/repos/symlinks/source_file/id_ed25519

@@ -0,0 +1,7 @@
+-----BEGIN OPENSSH PRIVATE KEY-----
+b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW
+QyNTUxOQAAACA8YWKYztuuvxUIMomc3zv0OdXCT57Cc2cRYu3TMbX9XAAAAJDiKO3C4ijt
+wgAAAAtzc2gtZWQyNTUxOQAAACA8YWKYztuuvxUIMomc3zv0OdXCT57Cc2cRYu3TMbX9XA
+AAAECzmj8DGxg5YHtBK4AmBttMXDQHsPAaCyYHQjJ4YujRBTxhYpjO266/FQgyiZzfO/Q5
+1cJPnsJzZxFi7dMxtf1cAAAADHJvb3RAZGV2aG9zdAE=
+-----END OPENSSH PRIVATE KEY-----