Jelajahi Sumber

add --max-target-megabytes : maximum size for a file/blob to be scanned (#1003)

* add --max-target-megabytes : maximum size for a file/blob to be scanned

* add --max-target-megabytes : maximum size for a file/blob to be scanned

* add --max-target-megabytes : maximum size for a file/blob to be scanned

* adjust readme comment

* fix on prem
roma8389 3 tahun lalu
induk
melakukan
c0caab0355
4 mengubah file dengan 24 tambahan dan 3 penghapusan
  1. 8 3
      README.md
  2. 3 0
      cmd/detect.go
  3. 1 0
      cmd/root.go
  4. 12 0
      detect/detect.go

+ 8 - 3
README.md

@@ -160,6 +160,7 @@ Flags:
   -b, --baseline-path          path to a previously generated report with known issues that gitleaks should ignore
   -s, --source string          path to source (git repo, directory, file)
   -v, --verbose                show verbose output from scan
+  --max-target-megabytes       files larger than this will be skipped
 
 Use "gitleaks [command] --help" for more information about a command.
 ```
@@ -194,8 +195,8 @@ as a pre-commit.
 
 ### Creating a baseline
 
-When scanning large repositories or repositories with a long history, it can be convenient to use a baseline. When using a baseline, 
-gitleaks will ignore any old findings that are present in the baseline. A baseline can be any gitleaks report. To create a gitleaks report, run gitleaks with the `--report-path` parameter. 
+When scanning large repositories or repositories with a long history, it can be convenient to use a baseline. When using a baseline,
+gitleaks will ignore any old findings that are present in the baseline. A baseline can be any gitleaks report. To create a gitleaks report, run gitleaks with the `--report-path` parameter.
 
 ```
 gitleaks detect --report-path gitleaks-report.json # This will save the report in a file called gitleaks-report.json
@@ -368,12 +369,16 @@ stopwords = [
   '''endpoint''',
 ]
 ```
+
 Refer to the default [gitleaks config](https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml) for examples or follow the [contributing guidelines](https://github.com/zricethezav/gitleaks/blob/master/README.md).
 
 ### Additional Configuration
+
 #### gitleaks:allow
+
 If you are knowingly committing a test secret that gitleaks will catch you can add a `gitleaks:allow` comment to that line which will instruct gitleaks
 to ignore that secret. Ex:
+
 ```
 class CustomClass:
     discord_client_secret = '8dyfuiRyq=vVc3RRr_edRk-fK__JItpZ'  #gitleaks:allow
@@ -381,8 +386,8 @@ class CustomClass:
 ```
 
 #### .gitleaksignore
-You can ignore specific findings by creating a `.gitleaksignore` file at the root of your repo. In release v8.10.0 Gitleaks added a `Fingerprint` value to the Gitleaks report. Each leak, or finding, has a Fingerprint that uniquely identifies a secret. Add this fingerprint to the `.gitleaksignore` file to ignore that specific secret. See Gitleaks' [.gitleaksignore](https://github.com/zricethezav/gitleaks/blob/master/.gitleaksignore) for an example. Note: this feature is expirmental and is subject to change in the future.
 
+You can ignore specific findings by creating a `.gitleaksignore` file at the root of your repo. In release v8.10.0 Gitleaks added a `Fingerprint` value to the Gitleaks report. Each leak, or finding, has a Fingerprint that uniquely identifies a secret. Add this fingerprint to the `.gitleaksignore` file to ignore that specific secret. See Gitleaks' [.gitleaksignore](https://github.com/zricethezav/gitleaks/blob/master/.gitleaksignore) for an example. Note: this feature is expirmental and is subject to change in the future.
 
 ## Secured by Jit
 

+ 3 - 0
cmd/detect.go

@@ -70,6 +70,9 @@ func runDetect(cmd *cobra.Command, args []string) {
 	if detector.Redact, err = cmd.Flags().GetBool("redact"); err != nil {
 		log.Fatal().Err(err).Msg("")
 	}
+	if detector.MaxTargetMegaBytes, err = cmd.Flags().GetInt("max-target-megabytes"); err != nil {
+		log.Fatal().Err(err).Msg("")
+	}
 
 	if fileExists(filepath.Join(source, ".gitleaksignore")) {
 		if err = detector.AddGitleaksIgnore(filepath.Join(source, ".gitleaksignore")); err != nil {

+ 1 - 0
cmd/root.go

@@ -45,6 +45,7 @@ func init() {
 	rootCmd.PersistentFlags().StringP("baseline-path", "b", "", "path to baseline with issues that can be ignored")
 	rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (trace, debug, info, warn, error, fatal)")
 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
+	rootCmd.PersistentFlags().Int("max-target-megabytes", 0, "files larger than this will be skipped")
 	rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")
 	rootCmd.PersistentFlags().Bool("no-banner", false, "suppress banner")
 	err := viper.BindPFlag("config", rootCmd.PersistentFlags().Lookup("config"))

+ 12 - 0
detect/detect.go

@@ -49,6 +49,9 @@ type Detector struct {
 	// verbose is a flag to print findings
 	Verbose bool
 
+	// files larger than this will be skipped
+	MaxTargetMegaBytes int
+
 	// commitMap is used to keep track of commits that have been scanned.
 	// This is only used for logging purposes and git scans.
 	commitMap map[string]bool
@@ -211,6 +214,15 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
 		return findings
 	}
 
+	// If flag configure and raw data size bigger then the flag
+	if d.MaxTargetMegaBytes > 0 {
+		rawLength := len(fragment.Raw) / 1000000
+		if rawLength > d.MaxTargetMegaBytes {
+			log.Debug().Msgf("skipping file: %s scan due to size: %d", fragment.FilePath, rawLength)
+			return findings
+		}
+	}
+
 	matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
 	for _, matchIndex := range matchIndices {
 		// extract secret from match