Ver Fonte

fix(dir): skip opening file&dir if allowlist matches (#1653)

Richard Gomez há 1 ano atrás
pai
commit
ed205a5f63

+ 10 - 6
cmd/detect.go

@@ -62,6 +62,10 @@ func runDetect(cmd *cobra.Command, args []string) {
 
 
 	detector := Detector(cmd, cfg, source)
 	detector := Detector(cmd, cfg, source)
 
 
+	// set follow symlinks flag
+	if detector.FollowSymlinks, err = cmd.Flags().GetBool("follow-symlinks"); err != nil {
+		log.Fatal().Err(err).Msg("")
+	}
 	// set exit code
 	// set exit code
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	if err != nil {
 	if err != nil {
@@ -83,7 +87,12 @@ func runDetect(cmd *cobra.Command, args []string) {
 	// start the detector scan
 	// start the detector scan
 	if noGit {
 	if noGit {
 		var paths <-chan sources.ScanTarget
 		var paths <-chan sources.ScanTarget
-		paths, err = sources.DirectoryTargets(source, detector.Sema, detector.FollowSymlinks)
+		paths, err = sources.DirectoryTargets(
+			source,
+			detector.Sema,
+			detector.FollowSymlinks,
+			detector.Config.Allowlist.PathAllowed,
+		)
 		if err != nil {
 		if err != nil {
 			log.Fatal().Err(err)
 			log.Fatal().Err(err)
 		}
 		}
@@ -120,10 +129,5 @@ func runDetect(cmd *cobra.Command, args []string) {
 		}
 		}
 	}
 	}
 
 
-	// set follow symlinks flag
-	if detector.FollowSymlinks, err = cmd.Flags().GetBool("follow-symlinks"); err != nil {
-		log.Fatal().Err(err).Msg("")
-	}
-
 	findingSummaryAndExit(findings, cmd, cfg, exitCode, start, err)
 	findingSummaryAndExit(findings, cmd, cfg, exitCode, start, err)
 }
 }

+ 10 - 1
cmd/directory.go

@@ -45,6 +45,10 @@ func runDirectory(cmd *cobra.Command, args []string) {
 
 
 	detector := Detector(cmd, cfg, source)
 	detector := Detector(cmd, cfg, source)
 
 
+	// set follow symlinks flag
+	if detector.FollowSymlinks, err = cmd.Flags().GetBool("follow-symlinks"); err != nil {
+		log.Fatal().Err(err).Msg("")
+	}
 	// set exit code
 	// set exit code
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	exitCode, err := cmd.Flags().GetInt("exit-code")
 	if err != nil {
 	if err != nil {
@@ -52,7 +56,12 @@ func runDirectory(cmd *cobra.Command, args []string) {
 	}
 	}
 
 
 	var paths <-chan sources.ScanTarget
 	var paths <-chan sources.ScanTarget
-	paths, err = sources.DirectoryTargets(source, detector.Sema, detector.FollowSymlinks)
+	paths, err = sources.DirectoryTargets(
+		source,
+		detector.Sema,
+		detector.FollowSymlinks,
+		detector.Config.Allowlist.PathAllowed,
+	)
 	if err != nil {
 	if err != nil {
 		log.Fatal().Err(err)
 		log.Fatal().Err(err)
 	}
 	}

+ 9 - 12
cmd/generate/config/base/config.go

@@ -64,7 +64,7 @@ func CreateGlobalConfig() config.Config {
 				// ----------- Golang files -----------
 				// ----------- Golang files -----------
 				regexp.MustCompile(`go\.(mod|sum|work(\.sum)?)$`),
 				regexp.MustCompile(`go\.(mod|sum|work(\.sum)?)$`),
 				regexp.MustCompile(`(^|/)vendor/modules\.txt$`),
 				regexp.MustCompile(`(^|/)vendor/modules\.txt$`),
-				regexp.MustCompile(`(^|/)vendor/(github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)/.*$`),
+				regexp.MustCompile(`(^|/)vendor/(github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)(/.*)?$`),
 
 
 				// ----------- Java files -----------
 				// ----------- Java files -----------
 				// Gradle
 				// Gradle
@@ -75,12 +75,9 @@ func CreateGlobalConfig() config.Config {
 
 
 				// ----------- JavaScript files -----------
 				// ----------- JavaScript files -----------
 				// Dependencies and lock files.
 				// Dependencies and lock files.
-				regexp.MustCompile(`(^|/)node_modules/.*?$`),
-				regexp.MustCompile(`(^|/)package-lock\.json$`),
-				regexp.MustCompile(`(^|/)yarn\.lock$`),
-				regexp.MustCompile(`(^|/)pnpm-lock\.yaml$`),
-				regexp.MustCompile(`(^|/)npm-shrinkwrap\.json$`),
-				regexp.MustCompile(`(^|/)bower_components/.*?$`),
+				regexp.MustCompile(`(^|/)node_modules(/.*)?$`),
+				regexp.MustCompile(`(^|/)(npm-shrinkwrap\.json|package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$`),
+				regexp.MustCompile(`(^|/)bower_components(/.*)?$`),
 				// TODO: Add more common static assets, such as swagger-ui.
 				// TODO: Add more common static assets, such as swagger-ui.
 				regexp.MustCompile(`(^|/)(angular|jquery(-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(\.min)?\.js(\.map)?$`),
 				regexp.MustCompile(`(^|/)(angular|jquery(-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(\.min)?\.js(\.map)?$`),
 
 
@@ -88,17 +85,17 @@ func CreateGlobalConfig() config.Config {
 				// Dependencies and lock files.
 				// Dependencies and lock files.
 				regexp.MustCompile(`(^|/)(Pipfile|poetry)\.lock$`),
 				regexp.MustCompile(`(^|/)(Pipfile|poetry)\.lock$`),
 				// Virtual environments
 				// Virtual environments
-				regexp.MustCompile(`(?i)/?(v?env|virtualenv)/lib(64)?/.+$`),
-				regexp.MustCompile(`(?i)(^|/)(lib(64)?/python[23](\.\d{1,2})+/|python/[23](\.\d{1,2})+/lib(64)?/).+$`),
+				regexp.MustCompile(`(?i)/?(v?env|virtualenv)/lib(64)?(/.*)?$`),
+				regexp.MustCompile(`(?i)(^|/)(lib(64)?/python[23](\.\d{1,2})+|python/[23](\.\d{1,2})+/lib(64)?)(/.*)?$`),
 				// dist-info directory (https://py-pkgs.org/04-package-structure.html#building-sdists-and-wheels)
 				// dist-info directory (https://py-pkgs.org/04-package-structure.html#building-sdists-and-wheels)
-				regexp.MustCompile(`(?i)(^|/)[a-z0-9_.]+-[0-9.]+\.dist-info/.+$`),
+				regexp.MustCompile(`(?i)(^|/)[a-z0-9_.]+-[0-9.]+\.dist-info(/.+)?$`),
 
 
 				// ----------- Ruby files -----------
 				// ----------- Ruby files -----------
-				regexp.MustCompile(`(^|/)vendor/(bundle|ruby)/.*?$`),
+				regexp.MustCompile(`(^|/)vendor/(bundle|ruby)(/.*?)?$`),
 				regexp.MustCompile(`\.gem$`), // tar archive
 				regexp.MustCompile(`\.gem$`), // tar archive
 
 
 				// Misc
 				// Misc
-				regexp.MustCompile(`verification-metadata.xml`),
+				regexp.MustCompile(`verification-metadata\.xml`),
 				regexp.MustCompile(`Database.refactorlog`),
 				regexp.MustCompile(`Database.refactorlog`),
 				//regexp.MustCompile(`vendor`),
 				//regexp.MustCompile(`vendor`),
 			},
 			},

+ 9 - 12
config/gitleaks.toml

@@ -32,25 +32,22 @@ paths = [
     '''(.*?)(doc|docx|zip|xls|pdf|bin|socket|vsidx|v2|suo|wsuo|.dll|pdb|exe|gltf)$''',
     '''(.*?)(doc|docx|zip|xls|pdf|bin|socket|vsidx|v2|suo|wsuo|.dll|pdb|exe|gltf)$''',
     '''go\.(mod|sum|work(\.sum)?)$''',
     '''go\.(mod|sum|work(\.sum)?)$''',
     '''(^|/)vendor/modules\.txt$''',
     '''(^|/)vendor/modules\.txt$''',
-    '''(^|/)vendor/(github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)/.*$''',
+    '''(^|/)vendor/(github\.com|golang\.org/x|google\.golang\.org|gopkg\.in|istio\.io|k8s\.io|sigs\.k8s\.io)(/.*)?$''',
     '''(^|/)gradlew(\.bat)?$''',
     '''(^|/)gradlew(\.bat)?$''',
     '''(^|/)gradle\.lockfile$''',
     '''(^|/)gradle\.lockfile$''',
     '''(^|/)mvnw(\.cmd)?$''',
     '''(^|/)mvnw(\.cmd)?$''',
     '''(^|/)\.mvn/wrapper/MavenWrapperDownloader\.java$''',
     '''(^|/)\.mvn/wrapper/MavenWrapperDownloader\.java$''',
-    '''(^|/)node_modules/.*?$''',
-    '''(^|/)package-lock\.json$''',
-    '''(^|/)yarn\.lock$''',
-    '''(^|/)pnpm-lock\.yaml$''',
-    '''(^|/)npm-shrinkwrap\.json$''',
-    '''(^|/)bower_components/.*?$''',
+    '''(^|/)node_modules(/.*)?$''',
+    '''(^|/)(npm-shrinkwrap\.json|package-lock\.json|pnpm-lock\.yaml|yarn\.lock)$''',
+    '''(^|/)bower_components(/.*)?$''',
     '''(^|/)(angular|jquery(-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(\.min)?\.js(\.map)?$''',
     '''(^|/)(angular|jquery(-?ui)?|plotly|swagger-?ui)[a-zA-Z0-9.-]*(\.min)?\.js(\.map)?$''',
     '''(^|/)(Pipfile|poetry)\.lock$''',
     '''(^|/)(Pipfile|poetry)\.lock$''',
-    '''(?i)/?(v?env|virtualenv)/lib(64)?/.+$''',
-    '''(?i)(^|/)(lib(64)?/python[23](\.\d{1,2})+/|python/[23](\.\d{1,2})+/lib(64)?/).+$''',
-    '''(?i)(^|/)[a-z0-9_.]+-[0-9.]+\.dist-info/.+$''',
-    '''(^|/)vendor/(bundle|ruby)/.*?$''',
+    '''(?i)/?(v?env|virtualenv)/lib(64)?(/.*)?$''',
+    '''(?i)(^|/)(lib(64)?/python[23](\.\d{1,2})+|python/[23](\.\d{1,2})+/lib(64)?)(/.*)?$''',
+    '''(?i)(^|/)[a-z0-9_.]+-[0-9.]+\.dist-info(/.+)?$''',
+    '''(^|/)vendor/(bundle|ruby)(/.*?)?$''',
     '''\.gem$''',
     '''\.gem$''',
-    '''verification-metadata.xml''',
+    '''verification-metadata\.xml''',
     '''Database.refactorlog''',
     '''Database.refactorlog''',
 ]
 ]
 stopwords = [
 stopwords = [

+ 2 - 2
detect/detect_test.go

@@ -789,7 +789,7 @@ func TestFromFiles(t *testing.T) {
 		err = detector.AddGitleaksIgnore(ignorePath)
 		err = detector.AddGitleaksIgnore(ignorePath)
 		require.NoError(t, err)
 		require.NoError(t, err)
 		detector.FollowSymlinks = true
 		detector.FollowSymlinks = true
-		paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true)
+		paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true, cfg.Allowlist.PathAllowed)
 		require.NoError(t, err)
 		require.NoError(t, err)
 		findings, err := detector.DetectFiles(paths)
 		findings, err := detector.DetectFiles(paths)
 		require.NoError(t, err)
 		require.NoError(t, err)
@@ -840,7 +840,7 @@ func TestDetectWithSymlinks(t *testing.T) {
 		cfg, _ := vc.Translate()
 		cfg, _ := vc.Translate()
 		detector := NewDetector(cfg)
 		detector := NewDetector(cfg)
 		detector.FollowSymlinks = true
 		detector.FollowSymlinks = true
-		paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true)
+		paths, err := sources.DirectoryTargets(tt.source, detector.Sema, true, cfg.Allowlist.PathAllowed)
 		require.NoError(t, err)
 		require.NoError(t, err)
 		findings, err := detector.DetectFiles(paths)
 		findings, err := detector.DetectFiles(paths)
 		require.NoError(t, err)
 		require.NoError(t, err)

+ 5 - 2
detect/directory.go

@@ -16,6 +16,7 @@ func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Findin
 		d.Sema.Go(func() error {
 		d.Sema.Go(func() error {
 			logger := log.With().Str("path", pa.Path).Logger()
 			logger := log.With().Str("path", pa.Path).Logger()
 			logger.Trace().Msg("Scanning path")
 			logger.Trace().Msg("Scanning path")
+
 			f, err := os.Open(pa.Path)
 			f, err := os.Open(pa.Path)
 			if err != nil {
 			if err != nil {
 				if os.IsPermission(err) {
 				if os.IsPermission(err) {
@@ -24,7 +25,9 @@ func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Findin
 				}
 				}
 				return err
 				return err
 			}
 			}
-			defer f.Close()
+			defer func() {
+				_ = f.Close()
+			}()
 
 
 			// Get file size
 			// Get file size
 			fileInfo, err := f.Stat()
 			fileInfo, err := f.Stat()
@@ -37,7 +40,7 @@ func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Findin
 				if rawLength > int64(d.MaxTargetMegaBytes) {
 				if rawLength > int64(d.MaxTargetMegaBytes) {
 					logger.Debug().
 					logger.Debug().
 						Int64("size", rawLength).
 						Int64("size", rawLength).
-						Msgf("Skipping file: exceeds --max-target-megabytes")
+						Msg("Skipping file: exceeds --max-target-megabytes")
 					return nil
 					return nil
 				}
 				}
 			}
 			}

+ 37 - 15
sources/directory.go

@@ -1,12 +1,12 @@
 package sources
 package sources
 
 
 import (
 import (
-	"github.com/rs/zerolog/log"
 	"io/fs"
 	"io/fs"
 	"os"
 	"os"
 	"path/filepath"
 	"path/filepath"
 
 
 	"github.com/fatih/semgroup"
 	"github.com/fatih/semgroup"
+	"github.com/rs/zerolog/log"
 )
 )
 
 
 type ScanTarget struct {
 type ScanTarget struct {
@@ -14,13 +14,14 @@ type ScanTarget struct {
 	Symlink string
 	Symlink string
 }
 }
 
 
-func DirectoryTargets(source string, s *semgroup.Group, followSymlinks bool) (<-chan ScanTarget, error) {
+func DirectoryTargets(source string, s *semgroup.Group, followSymlinks bool, shouldSkip func(string) bool) (<-chan ScanTarget, error) {
 	paths := make(chan ScanTarget)
 	paths := make(chan ScanTarget)
 	s.Go(func() error {
 	s.Go(func() error {
 		defer close(paths)
 		defer close(paths)
 		return filepath.Walk(source,
 		return filepath.Walk(source,
 			func(path string, fInfo os.FileInfo, err error) error {
 			func(path string, fInfo os.FileInfo, err error) error {
 				logger := log.With().Str("path", path).Logger()
 				logger := log.With().Str("path", path).Logger()
+
 				if err != nil {
 				if err != nil {
 					if os.IsPermission(err) {
 					if os.IsPermission(err) {
 						// This seems to only fail on directories at this stage.
 						// This seems to only fail on directories at this stage.
@@ -30,21 +31,18 @@ func DirectoryTargets(source string, s *semgroup.Group, followSymlinks bool) (<-
 					return err
 					return err
 				}
 				}
 
 
-				if fInfo.Name() == ".git" && fInfo.IsDir() {
-					return filepath.SkipDir
-				}
+				// Empty; nothing to do here.
 				if fInfo.Size() == 0 {
 				if fInfo.Size() == 0 {
 					return nil
 					return nil
 				}
 				}
-				if fInfo.Mode().IsRegular() {
-					paths <- ScanTarget{
-						Path:    path,
-						Symlink: "",
-					}
+
+				// Unwrap symlinks, if |followSymlinks| is set.
+				scanTarget := ScanTarget{
+					Path: path,
 				}
 				}
 				if fInfo.Mode().Type() == fs.ModeSymlink {
 				if fInfo.Mode().Type() == fs.ModeSymlink {
 					if !followSymlinks {
 					if !followSymlinks {
-						log.Debug().Str("path", path).Msg("Skipping symlink")
+						logger.Debug().Msg("Skipping symlink")
 						return nil
 						return nil
 					}
 					}
 
 
@@ -52,15 +50,39 @@ func DirectoryTargets(source string, s *semgroup.Group, followSymlinks bool) (<-
 					if err != nil {
 					if err != nil {
 						return err
 						return err
 					}
 					}
+
 					realPathFileInfo, _ := os.Stat(realPath)
 					realPathFileInfo, _ := os.Stat(realPath)
 					if realPathFileInfo.IsDir() {
 					if realPathFileInfo.IsDir() {
-						log.Debug().Msgf("found symlinked directory: %s -> %s [skipping]", path, realPath)
+						logger.Warn().Str("target", realPath).Msg("Skipping symlinked directory")
 						return nil
 						return nil
 					}
 					}
-					paths <- ScanTarget{
-						Path:    realPath,
-						Symlink: path,
+
+					scanTarget.Path = realPath
+					scanTarget.Symlink = path
+				}
+
+				// TODO: Also run this check against the resolved symlink?
+				skip := shouldSkip(path)
+				if fInfo.IsDir() {
+					// Directory
+					if skip {
+						logger.Debug().Msg("Skipping directory due to global allowlist")
+						return filepath.SkipDir
+					}
+
+					if fInfo.Name() == ".git" {
+						// Don't scan .git directories.
+						// TODO: Add this to the config allowlist, instead of hard-coding it.
+						return filepath.SkipDir
+					}
+				} else {
+					// File
+					if skip {
+						logger.Debug().Msg("Skipping file due to global allowlist")
+						return nil
 					}
 					}
+
+					paths <- scanTarget
 				}
 				}
 				return nil
 				return nil
 			})
 			})