瀏覽代碼

update to V7 (#469)

Zachary Rice 5 年之前
父節點
當前提交
c50906373c
共有 88 個文件被更改,包括 2608 次插入2673 次删除
  1. 1 1
      Dockerfile
  2. 2 5
      Makefile
  3. 69 6
      README.md
  4. 67 4
      config/config.go
  5. 16 16
      config/config_test.go
  6. 26 1
      config/default.go
  7. 1 1
      examples/leaky-repo.toml
  8. 1 1
      examples/simple_regex_and_allowlist_config.toml
  9. 1 1
      examples/simple_regex_config.toml
  10. 6 8
      go.mod
  11. 17 34
      go.sum
  12. 0 190
      hosts/github.go
  13. 0 112
      hosts/gitlab.go
  14. 0 53
      hosts/host.go
  15. 0 116
      hosts/hosts_test.go
  16. 33 58
      main.go
  17. 0 274
      manager/manager.go
  18. 0 106
      manager/manager_test.go
  19. 0 78
      manager/report.go
  20. 74 119
      options/options.go
  21. 33 0
      report/leak.go
  22. 95 0
      report/report.go
  23. 15 13
      report/sarif.go
  24. 86 0
      scan/commit.go
  25. 44 0
      scan/commits.go
  26. 60 0
      scan/filesatcommit.go
  27. 103 0
      scan/nogit.go
  28. 69 0
      scan/parent.go
  29. 120 261
      scan/repo.go
  30. 0 400
      scan/rule.go
  31. 114 439
      scan/scan.go
  32. 151 118
      scan/scan_test.go
  33. 190 0
      scan/unstaged.go
  34. 510 0
      scan/utils.go
  35. 4 2
      test_data/test_configs/allowlist_commit.toml
  36. 1 1
      test_data/test_configs/aws_key.toml
  37. 1 1
      test_data/test_configs/aws_key_allowlist_files.toml
  38. 1 1
      test_data/test_configs/aws_key_allowlist_python_files.toml
  39. 1 1
      test_data/test_configs/aws_key_aws_allowlisted.toml
  40. 1 1
      test_data/test_configs/aws_key_file_regex.toml
  41. 1 1
      test_data/test_configs/aws_key_global_allowlist_file.toml
  42. 1 1
      test_data/test_configs/aws_key_global_allowlist_path.toml
  43. 1 1
      test_data/test_configs/aws_key_local_owner_allowlist_repo.toml
  44. 1 1
      test_data/test_configs/aws_key_with_report_groups.toml
  45. 1 1
      test_data/test_configs/bad_aws_key.toml
  46. 1 1
      test_data/test_configs/bad_aws_key_file_regex.toml
  47. 1 1
      test_data/test_configs/bad_aws_key_global_allowlist_file.toml
  48. 1 1
      test_data/test_configs/bad_regex_aws_key.toml
  49. 16 0
      test_data/test_configs/bug_453.toml
  50. 1 1
      test_data/test_configs/large.toml
  51. 1 1
      test_data/test_configs/large_with_global_allowlist_regex.toml
  52. 1 1
      test_data/test_configs/regex_filepath.toml
  53. 30 0
      test_data/test_dir1_aws_leak.json
  54. 16 0
      test_data/test_file1_aws_leak.json
  55. 217 44
      test_data/test_local_owner_aws_leak.json
  56. 213 56
      test_data/test_local_owner_aws_leak_allowlist_repo.json
  57. 31 24
      test_data/test_local_owner_aws_leak_depth_2.json
  58. 3 3
      test_data/test_local_repo_eight.json
  59. 2 2
      test_data/test_local_repo_five_files_at_commit.json
  60. 2 2
      test_data/test_local_repo_five_files_at_latest_commit.json
  61. 8 6
      test_data/test_local_repo_four_alt_config_entropy.json
  62. 10 8
      test_data/test_local_repo_four_leaks_commit_timerange.json
  63. 1 1
      test_data/test_local_repo_nine_aws_leak.json
  64. 1 1
      test_data/test_local_repo_one_aws_leak.json
  65. 1 1
      test_data/test_local_repo_one_aws_leak_and_file_leak.json
  66. 1 1
      test_data/test_local_repo_one_aws_leak_commit.json
  67. 5 4
      test_data/test_local_repo_one_aws_leak_uncommitted.json
  68. 2 2
      test_data/test_local_repo_seven_aws_leak_uncommitted.json
  69. 1 1
      test_data/test_local_repo_six.json
  70. 1 1
      test_data/test_local_repo_six_filepath.json
  71. 1 1
      test_data/test_local_repo_six_leaks_since_date.json
  72. 1 1
      test_data/test_local_repo_six_leaks_until_date.json
  73. 1 1
      test_data/test_local_repo_six_path_globally_allowlisted.json
  74. 19 15
      test_data/test_local_repo_three_leaks.json
  75. 14 11
      test_data/test_local_repo_three_leaks_with_report_groups.json
  76. 5 4
      test_data/test_local_repo_two_allowlist_commits.json
  77. 14 11
      test_data/test_local_repo_two_leaks.json
  78. 9 7
      test_data/test_local_repo_two_leaks_commit_from.json
  79. 9 7
      test_data/test_local_repo_two_leaks_commit_range.json
  80. 5 4
      test_data/test_local_repo_two_leaks_commit_to.json
  81. 5 4
      test_data/test_local_repo_two_leaks_commit_to_from.json
  82. 6 6
      test_data/test_local_repo_two_leaks_deletion.json
  83. 9 7
      test_data/test_local_repo_two_leaks_file_commit_range.json
  84. 1 0
      test_data/test_regex_allowlist.json.got
  85. 1 0
      test_data/test_repos/no_repo/tmp.tmp
  86. 9 0
      test_data/test_repos/test_dir_1/server.test.py
  87. 9 0
      test_data/test_repos/test_dir_1/server.test2.py
  88. 5 4
      test_data/test_repos/test_repo_4/gitleaks.toml

+ 1 - 1
Dockerfile

@@ -1,4 +1,4 @@
-FROM golang:1.14.1 AS build
+FROM golang:1.15.5 AS build
 WORKDIR /go/src/github.com/zricethezav/gitleaks
 ARG ldflags
 COPY . .

+ 2 - 5
Makefile

@@ -2,8 +2,8 @@
 
 VERSION := `git fetch --tags && git tag | sort -V | tail -1`
 PKG=github.com/zricethezav/gitleaks
-LDFLAGS=-ldflags "-X=github.com/zricethezav/gitleaks/v6/version.Version=$(VERSION)"
-_LDFLAGS="github.com/zricethezav/gitleaks/v6/version.Version=$(VERSION)"
+LDFLAGS=-ldflags "-X=github.com/zricethezav/gitleaks/v7/version.Version=$(VERSION)"
+_LDFLAGS="github.com/zricethezav/gitleaks/v7/version.Version=$(VERSION)"
 COVER=--cover --coverprofile=cover.out
 
 test-cover:
@@ -17,9 +17,6 @@ test:
 	golint ./...
 	go test ./... --race $(PKG) -v
 
-test-integration:
-	go test github.com/zricethezav/gitleaks/hosts -v -integration
-
 build:
 	go fmt ./...
 	golint ./...

+ 69 - 6
README.md

@@ -8,18 +8,79 @@
 Gitleaks is a SAST tool for detecting hardcoded secrets like passwords, api keys, and tokens in git repos. Gitleaks aims to be the **easy-to-use, all-in-one solution** for finding secrets, past or present, in your code. 
  
 ### Features:
-- Scans for [commited](https://github.com/zricethezav/gitleaks/wiki/Scanning) secrets
-- Scans for [uncommitted](https://github.com/zricethezav/gitleaks/wiki/Scanning#uncommitted-changes-scan) secrets as part of shifting security left
+- Scan for [commited](https://github.com/zricethezav/gitleaks/wiki/Scanning) secrets
+- Scan for [uncommitted](https://github.com/zricethezav/gitleaks/wiki/Scanning#uncommitted-changes-scan) secrets as part of shifting security left
+- Scan for entire directories and files
 - Available [Github Action](https://github.com/marketplace/actions/gitleaks)
-- Gitlab and Github API support which allows scans of whole organizations, users, and pull/merge requests
 - [Custom rules](https://github.com/zricethezav/gitleaks/wiki/Configuration) via toml configuration
 - High performance using [go-git](https://github.com/go-git/go-git)
-- JSON and CSV reporting
+- JSON, SARIF, and CSV reporting
 - Private repo scans using key or password based authentication
 
 
-## Installation, Documentation and Examples
-This project is documented [here](https://github.com/zricethezav/gitleaks/wiki)
+### Installation
+Written in Go, gitleaks is available in binary form for many popular platforms and OS types from the [releases page](https://github.com/zricethezav/gitleaks/releases). Alternatively, executed via Docker or it can be installed using Go directly.
+
+##### MacOS
+
+```
+brew install gitleaks
+```
+
+##### Docker
+
+```bash
+docker pull zricethezav/gitleaks
+```
+
+##### Go
+```bash
+GO111MODULE=on go get github.com/zricethezav/gitleaks/v6
+```
+
+### Usage
+```
+Usage:
+  gitleaks [OPTIONS]
+
+Application Options:
+  -v, --verbose           Show verbose output from scan
+  -r, --repo-url=         Repository URL
+  -p, --path=             Path to directory (repo if contains .git) or file
+  -c, --config-path=      Path to config
+      --repo-config-path= Path to gitleaks config relative to repo root
+      --clone-path=       Path to clone repo to disk
+      --clone-cleanup=    Deletes cloned repo after scan
+      --version           Version number
+      --username=         Username for git repo
+      --password=         Password for git repo
+      --access-token=     Access token for git repo
+      --threads=          Maximum number of threads gitleaks spawns
+      --ssh-key=          Path to ssh key used for auth
+      --unstaged          Run gitleaks on unstaged code
+      --branch=           Branch to scan
+      --redact            Redact secrets from log messages and leaks
+      --debug             Log debug messages
+      --no-git            Treat git repos as plain directories and scan those
+                          files
+  -o, --report=           Report output path
+  -f, --format=           JSON, CSV, SARIF (default: json)
+      --files-at-commit=  Sha of commit to scan all files at commit
+      --commit=           Sha of commit to scan or "latest" to scan the last
+                          commit of the repository
+      --commits=          Comma separated list of a commits to scan
+      --commits-file=     Path to file of line separated list of commits to scan
+      --commit-from=      Commit to start scan from
+      --commit-to=        Commit to stop scan
+      --commit-since=     Scan commits more recent than a specific date. Ex:
+                          '2006-01-02' or '2006-01-02T15:04:05-0700' format.
+      --commit-until=     Scan commits older than a specific date. Ex:
+                          '2006-01-02' or '2006-01-02T15:04:05-0700' format.
+      --depth=            Number of commits to scan
+
+Help Options:
+  -h, --help              Show this help message
+```
 
 
 ###  Sponsors ❤️
@@ -34,6 +95,8 @@ These users are [sponsors](https://github.com/sponsors/zricethezav) of gitleaks:
 [![Adam Shannon](https://github.com/adamdecaf.png?size=50)](https://github.com/adamdecaf) | 
 ---|
 ----
+
+
 #### Logo Attribution
 The Gitleaks logo uses the Git Logo created <a href="https://twitter.com/jasonlong">Jason Long</a> is licensed under the <a href="https://creativecommons.org/licenses/by/3.0/">Creative Commons Attribution 3.0 Unported License</a>.
 

+ 67 - 4
config/config.go

@@ -6,9 +6,10 @@ import (
 	"regexp"
 	"strconv"
 
-	"github.com/zricethezav/gitleaks/v6/options"
+	"github.com/zricethezav/gitleaks/v7/options"
 
 	"github.com/BurntSushi/toml"
+	"github.com/go-git/go-git/v5"
 	log "github.com/sirupsen/logrus"
 )
 
@@ -94,10 +95,10 @@ func NewConfig(options options.Options) (Config, error) {
 	tomlLoader := TomlLoader{}
 
 	var err error
-	if options.Config != "" {
-		_, err = toml.DecodeFile(options.Config, &tomlLoader)
+	if options.ConfigPath != "" {
+		_, err = toml.DecodeFile(options.ConfigPath, &tomlLoader)
 		// append a allowlist rule for allowlisting the config
-		tomlLoader.AllowList.Files = append(tomlLoader.AllowList.Files, path.Base(options.Config))
+		tomlLoader.AllowList.Files = append(tomlLoader.AllowList.Files, path.Base(options.ConfigPath))
 	} else {
 		_, err = toml.Decode(DefaultConfig, &tomlLoader)
 	}
@@ -139,6 +140,8 @@ func (tomlLoader TomlLoader) Parse() (Config, error) {
 		// rule specific allowlists
 		var allowList AllowList
 
+		allowList.Description = rule.AllowList.Description
+
 		// rule specific regexes
 		for _, re := range rule.AllowList.Regexes {
 			allowListedRegex, err := regexp.Compile(re)
@@ -166,6 +169,9 @@ func (tomlLoader TomlLoader) Parse() (Config, error) {
 			allowList.Paths = append(allowList.Paths, allowListedRegex)
 		}
 
+		// rule specific commits
+		allowList.Commits = rule.AllowList.Commits
+
 		var entropies []Entropy
 		for _, e := range rule.Entropies {
 			min, err := strconv.ParseFloat(e.Min, 64)
@@ -250,3 +256,60 @@ func (tomlLoader TomlLoader) Parse() (Config, error) {
 
 	return cfg, nil
 }
+
+// LoadRepoConfig accepts a repo and config path related to the target repo's root.
+func LoadRepoConfig(repo *git.Repository, repoConfig string) (Config, error) {
+	gitRepoConfig, err := repo.Config()
+	if err != nil {
+		return Config{}, err
+	}
+	if !gitRepoConfig.Core.IsBare {
+		wt, err := repo.Worktree()
+		if err != nil {
+			return Config{}, err
+		}
+		_, err = wt.Filesystem.Stat(repoConfig)
+		if err != nil {
+			return Config{}, err
+		}
+		r, err := wt.Filesystem.Open(repoConfig)
+		if err != nil {
+			return Config{}, err
+		}
+		var tomlLoader TomlLoader
+		_, err = toml.DecodeReader(r, &tomlLoader)
+		if err != nil {
+			return Config{}, err
+		}
+
+		return tomlLoader.Parse()
+	}
+
+	log.Debug("attempting to load repo config from bare worktree, this may use an old config")
+	ref, err := repo.Head()
+	if err != nil {
+		return Config{}, err
+	}
+
+	c, err := repo.CommitObject(ref.Hash())
+	if err != nil {
+		return Config{}, err
+	}
+
+	f, err := c.File(repoConfig)
+	if err != nil {
+		return Config{}, err
+	}
+
+	var tomlLoader TomlLoader
+	r, err := f.Reader()
+	if err != nil {
+		return Config{}, err
+	}
+	_, err = toml.DecodeReader(r, &tomlLoader)
+	if err != nil {
+		return Config{}, err
+	}
+
+	return tomlLoader.Parse()
+}

+ 16 - 16
config/config_test.go

@@ -7,7 +7,7 @@ import (
 	"regexp"
 	"testing"
 
-	"github.com/zricethezav/gitleaks/v6/options"
+	"github.com/zricethezav/gitleaks/v7/options"
 )
 
 func TestParse(t *testing.T) {
@@ -26,95 +26,95 @@ func TestParse(t *testing.T) {
 		{
 			description: "test successful load",
 			opts: options.Options{
-				Config: "../test_data/test_configs/aws_key.toml",
+				ConfigPath: "../test_data/test_configs/aws_key.toml",
 			},
 		},
 		{
 			description: "test bad toml",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_aws_key.toml",
+				ConfigPath: "../test_data/test_configs/bad_aws_key.toml",
 			},
 			wantErr: fmt.Errorf("Near line 7 (last key parsed 'rules.description'): expected value but found \"AWS\" instead"),
 		},
 		{
 			description: "test bad regex",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_regex_aws_key.toml",
+				ConfigPath: "../test_data/test_configs/bad_regex_aws_key.toml",
 			},
 			wantErr: fmt.Errorf("problem loading config: error parsing regexp: invalid nested repetition operator: `???`"),
 		},
 		{
 			description: "test bad global allowlist file regex",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_aws_key_global_allowlist_file.toml",
+				ConfigPath: "../test_data/test_configs/bad_aws_key_global_allowlist_file.toml",
 			},
 			wantErr: fmt.Errorf("problem loading config: error parsing regexp: missing argument to repetition operator: `??`"),
 		},
 		{
 			description: "test bad global file regex",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_aws_key_file_regex.toml",
+				ConfigPath: "../test_data/test_configs/bad_aws_key_file_regex.toml",
 			},
 			wantErr: fmt.Errorf("problem loading config: error parsing regexp: missing argument to repetition operator: `??`"),
 		},
 		{
 			description: "test successful load big ol thing",
 			opts: options.Options{
-				Config: "../test_data/test_configs/large.toml",
+				ConfigPath: "../test_data/test_configs/large.toml",
 			},
 		},
 		{
 			description: "test load entropy",
 			opts: options.Options{
-				Config: "../test_data/test_configs/entropy.toml",
+				ConfigPath: "../test_data/test_configs/entropy.toml",
 			},
 		},
 		{
 			description: "test entropy bad range",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_entropy_1.toml",
+				ConfigPath: "../test_data/test_configs/bad_entropy_1.toml",
 			},
 			wantErr: fmt.Errorf("problem loading config: entropy Min value cannot be higher than Max value"),
 		},
 		{
 			description: "test entropy value max",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_entropy_2.toml",
+				ConfigPath: "../test_data/test_configs/bad_entropy_2.toml",
 			},
 			wantErr: fmt.Errorf("strconv.ParseFloat: parsing \"x\": invalid syntax"),
 		},
 		{
 			description: "test entropy value min",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_entropy_3.toml",
+				ConfigPath: "../test_data/test_configs/bad_entropy_3.toml",
 			},
 			wantErr: fmt.Errorf("strconv.ParseFloat: parsing \"x\": invalid syntax"),
 		},
 		{
 			description: "test entropy value group",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_entropy_4.toml",
+				ConfigPath: "../test_data/test_configs/bad_entropy_4.toml",
 			},
 			wantErr: fmt.Errorf("strconv.ParseInt: parsing \"x\": invalid syntax"),
 		},
 		{
 			description: "test entropy value group",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_entropy_5.toml",
+				ConfigPath: "../test_data/test_configs/bad_entropy_5.toml",
 			},
 			wantErr: fmt.Errorf("problem loading config: group cannot be lower than 0"),
 		},
 		{
 			description: "test entropy value group",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_entropy_6.toml",
+				ConfigPath: "../test_data/test_configs/bad_entropy_6.toml",
 			},
 			wantErr: fmt.Errorf("problem loading config: group cannot be higher than number of groups in regexp"),
 		},
 		{
 			description: "test entropy range limits",
 			opts: options.Options{
-				Config: "../test_data/test_configs/bad_entropy_7.toml",
+				ConfigPath: "../test_data/test_configs/bad_entropy_7.toml",
 			},
 			wantErr: fmt.Errorf("problem loading config: invalid entropy ranges, must be within 0.0-8.0"),
 		},
@@ -151,7 +151,7 @@ func TestParseFields(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	config, err := NewConfig(options.Options{Config: configPath})
+	config, err := NewConfig(options.Options{ConfigPath: configPath})
 	if err != nil {
 		t.Fatalf("Couldn't parse config: %v", err)
 	}

+ 26 - 1
config/default.go

@@ -7,7 +7,7 @@ const DefaultConfig = `
 title = "gitleaks config"
 
 [[rules]]
-	description = "AWS Manager ID"
+	description = "AWS Access Key"
 	regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
 	tags = ["key", "AWS"]
 
@@ -131,6 +131,31 @@ title = "gitleaks config"
 	regex = '''(?i)twilio(.{0,20})?SK[0-9a-f]{32}'''
 	tags = ["key", "twilio"]
 
+[[rules]]
+	description = "Dynatrace ttoken"
+	regex = '''dt0[a-zA-Z]{1}[0-9]{2}\.[A-Z0-9]{24}\.[A-Z0-9]{64}'''
+	tags = ["key", "Dynatrace"]
+
+[[rules]]
+	description = "Shopify shared secret"
+	regex = '''shpss_[a-fA-F0-9]{32}'''
+	tags = ["key", "Shopify"]
+
+[[rules]]
+	description = "Shopify access token"
+	regex = '''shpat_[a-fA-F0-9]{32}'''
+	tags = ["key", "Shopify"]
+
+[[rules]]
+	description = "Shopify custom app access token"
+	regex = '''shpca_[a-fA-F0-9]{32}'''
+	tags = ["key", "Shopify"]
+
+[[rules]]
+	description = "Shopify private app access token"
+	regex = '''shppa_[a-fA-F0-9]{32}'''
+	tags = ["key", "Shopify"]
+
 [allowlist]
 	description = "Allowlisted files"
 	files = ['''^\.?gitleaks.toml$''',

+ 1 - 1
examples/leaky-repo.toml

@@ -1,7 +1,7 @@
 title = "gitleaks config"
 
 [[rules]]
-	description = "AWS Manager ID"
+	description = "AWS Access Key"
 	regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
 	tags = ["key", "AWS"]
 

+ 1 - 1
examples/simple_regex_and_allowlist_config.toml

@@ -5,7 +5,7 @@
 # with the rule below, but since we have a allowlist against that specific key, it would be ignored.
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
     [rules.allowlist]

+ 1 - 1
examples/simple_regex_config.toml

@@ -1,6 +1,6 @@
 # This is a simple gitleaks config that contains one rule which checks for AWS keys.
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]

+ 6 - 8
go.mod

@@ -1,18 +1,16 @@
-module github.com/zricethezav/gitleaks/v6
+module github.com/zricethezav/gitleaks/v7
 
-go 1.14
+go 1.15
 
 require (
 	github.com/BurntSushi/toml v0.3.1
-	github.com/go-git/go-billy/v5 v5.0.0
-	github.com/go-git/go-git/v5 v5.1.0
+	github.com/go-git/go-git/v5 v5.2.0
 	github.com/google/go-cmp v0.4.0 // indirect
-	github.com/google/go-github/v31 v31.0.0
 	github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4
 	github.com/jessevdk/go-flags v1.4.0
-	github.com/mattn/go-colorable v0.1.2
 	github.com/sergi/go-diff v1.1.0
 	github.com/sirupsen/logrus v1.4.2
-	github.com/xanzy/go-gitlab v0.21.0
-	golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
+	golang.org/x/lint v0.0.0-20200302205851-738671d3881b // indirect
+	golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9
+	gopkg.in/yaml.v2 v2.2.8 // indirect
 )

+ 17 - 34
go.sum

@@ -1,4 +1,3 @@
-cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 h1:uSoVVbwJiQipAclBbw+8quDsfcvFjOpI5iCf4p/cqCs=
@@ -21,22 +20,14 @@ github.com/go-git/gcfg v1.5.0 h1:Q5ViNfGF8zFgyJWPqYwA7qGFoMTEiBmdlkcfRmpIMa4=
 github.com/go-git/gcfg v1.5.0/go.mod h1:5m20vg6GwYabIxaOonVkTdrILxQMpEShl1xiMF4ua+E=
 github.com/go-git/go-billy/v5 v5.0.0 h1:7NQHvd9FVid8VL4qVUMm8XifBK+2xCoZ2lSk0agRrHM=
 github.com/go-git/go-billy/v5 v5.0.0/go.mod h1:pmpqyWchKfYfrkb/UVH4otLvyi/5gJlGI4Hb3ZqZ3W0=
-github.com/go-git/go-git-fixtures/v4 v4.0.1 h1:q+IFMfLx200Q3scvt2hN79JsEzy4AmBTp/pqnefH+Bc=
-github.com/go-git/go-git-fixtures/v4 v4.0.1/go.mod h1:m+ICp2rF3jDhFgEZ/8yziagdT1C+ZpZcrJjappBCDSw=
-github.com/go-git/go-git/v5 v5.1.0 h1:HxJn9g/E7eYvKW3Fm7Jt4ee8LXfPOm/H1cdDu8vEssk=
-github.com/go-git/go-git/v5 v5.1.0/go.mod h1:ZKfuPUoY1ZqIG4QG9BDBh3G4gLM5zvPuSJAozQrZuyM=
-github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
-github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
-github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/go-git/go-git-fixtures/v4 v4.0.2-0.20200613231340-f56387b50c12 h1:PbKy9zOy4aAKrJ5pibIRpVO2BXnK1Tlcg+caKI7Ox5M=
+github.com/go-git/go-git-fixtures/v4 v4.0.2-0.20200613231340-f56387b50c12/go.mod h1:m+ICp2rF3jDhFgEZ/8yziagdT1C+ZpZcrJjappBCDSw=
+github.com/go-git/go-git/v5 v5.2.0 h1:YPBLG/3UK1we1ohRkncLjaXWLW+HKp5QNM/jTli2JgI=
+github.com/go-git/go-git/v5 v5.2.0/go.mod h1:kh02eMX+wdqqxgNMEyq8YgwlIOsDOa9homkUq1PoTMs=
 github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-github/v31 v31.0.0 h1:JJUxlP9lFK+ziXKimTCprajMApV1ecWD4NB6CCb0plo=
-github.com/google/go-github/v31 v31.0.0/go.mod h1:NQPZol8/1sMoWYGN2yaALIBytu17gAWfhbweiEed3pM=
-github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk=
-github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
 github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4 h1:60gBOooTSmNtrqNaRvrDbi8VAne0REaek2agjnITKSw=
 github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4/go.mod h1:5Scbynm8dF1XAPwIwkGPqzkM/shndPm79Jd1003hTjE=
 github.com/imdario/mergo v0.3.9 h1:UauaLniWCFHWd+Jp9oCEkTBj8VO/9DKg3PV3VCNMDIg=
@@ -56,10 +47,6 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU=
-github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
-github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE=
-github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
@@ -77,30 +64,25 @@ github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
-github.com/xanzy/go-gitlab v0.21.0 h1:Ru55sR4TBoDNsAKwCOpzeaGtbiWj7xTksVmzBJbLu6c=
-github.com/xanzy/go-gitlab v0.21.0/go.mod h1:t4Bmvnxj7k37S4Y17lfLx+nLqkf/oQwT2HagfWKv5Og=
 github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
 github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
 golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073 h1:xMPOj6Pz6UipU1wXLkrtqpHbR0AVFnyPEQq/wRWz9lM=
 golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20181108082009-03003ca0c849/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
-golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k=
+golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
 golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
-golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 h1:SVwTIAaPC2U/AvvLNZ2a7OVsmBpC8L5BlwK1whH3hm0=
-golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527 h1:uYVVQ9WP/Ds2ROhcaGPeIdVq0RIXVLwsHlnvJ+cT1So=
@@ -109,12 +91,11 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7 h1:EBZoQjiKKPaLbPrbpssUfuHtwM6KV/vb4U85g/cigFY=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
-google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
-google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
@@ -125,3 +106,5 @@ gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
+gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

+ 0 - 190
hosts/github.go

@@ -1,190 +0,0 @@
-package hosts
-
-import (
-	"context"
-	"strconv"
-	"strings"
-	"sync"
-
-	"github.com/zricethezav/gitleaks/v6/manager"
-	"github.com/zricethezav/gitleaks/v6/options"
-	"github.com/zricethezav/gitleaks/v6/scan"
-
-	"github.com/go-git/go-git/v5"
-	"github.com/go-git/go-git/v5/plumbing"
-	"github.com/go-git/go-git/v5/plumbing/object"
-	"github.com/go-git/go-git/v5/plumbing/transport"
-	"github.com/google/go-github/v31/github"
-	log "github.com/sirupsen/logrus"
-	"golang.org/x/oauth2"
-)
-
-// Github wraps a github client and manager. This struct implements what the Host interface defines.
-type Github struct {
-	client  *github.Client
-	manager *manager.Manager
-	wg      sync.WaitGroup
-}
-
-// NewGithubClient accepts a manager struct and returns a Github host pointer which will be used to
-// perform a github scan on an organization, user, or PR.
-func NewGithubClient(m *manager.Manager) (*Github, error) {
-	var err error
-	ctx := context.Background()
-	token := oauth2.StaticTokenSource(
-		&oauth2.Token{AccessToken: options.GetAccessToken(m.Opts)},
-	)
-
-	var githubClient *github.Client
-	httpClient := oauth2.NewClient(ctx, token)
-
-	if m.Opts.BaseURL == "" {
-		githubClient = github.NewClient(httpClient)
-	} else {
-		githubClient, err = github.NewEnterpriseClient(m.Opts.BaseURL, m.Opts.BaseURL, httpClient)
-	}
-
-	return &Github{
-		manager: m,
-		client:  githubClient,
-	}, err
-}
-
-// Scan will scan a github user or organization's repos.
-func (g *Github) Scan() {
-	ctx := context.Background()
-	listOptions := github.ListOptions{
-		PerPage: 100,
-		Page:    1,
-	}
-
-	var (
-		githubRepos []*github.Repository
-		auth        transport.AuthMethod
-	)
-
-	for {
-		var (
-			_githubRepos []*github.Repository
-			resp         *github.Response
-			err          error
-		)
-		if g.manager.Opts.User != "" {
-			_githubRepos, resp, err = g.client.Repositories.List(ctx, g.manager.Opts.User,
-				&github.RepositoryListOptions{ListOptions: listOptions})
-		} else if g.manager.Opts.Organization != "" {
-			_githubRepos, resp, err = g.client.Repositories.ListByOrg(ctx, g.manager.Opts.Organization,
-				&github.RepositoryListByOrgOptions{ListOptions: listOptions})
-		} else {
-			_githubRepos, resp, err = g.client.Repositories.List(ctx, "",
-				&github.RepositoryListOptions{ListOptions: listOptions})
-		}
-
-		for _, r := range _githubRepos {
-			if g.manager.Opts.ExcludeForks && r.GetFork() {
-				log.Debugf("excluding forked repo: %s", *r.Name)
-				continue
-			}
-			githubRepos = append(githubRepos, r)
-		}
-
-		if resp == nil {
-			break
-		}
-
-		if resp.LastPage != 0 {
-			log.Infof("gathering github repos... progress: page %d of %d", listOptions.Page, resp.LastPage)
-		} else {
-			log.Infof("gathering github repos... progress: page %d of %d", listOptions.Page, listOptions.Page)
-		}
-
-		listOptions.Page = resp.NextPage
-		if err != nil || listOptions.Page == 0 {
-			break
-		}
-	}
-
-	for _, repo := range githubRepos {
-		r := scan.NewRepo(g.manager)
-
-		if g.manager.CloneOptions != nil {
-			auth = g.manager.CloneOptions.Auth
-		}
-		err := r.Clone(&git.CloneOptions{
-			URL:  *repo.CloneURL,
-			Auth: auth,
-		})
-		r.Name = *repo.Name
-		if err != nil {
-			log.Warn("unable to clone via https and access token, attempting with ssh now")
-			auth, err := options.SSHAuth(g.manager.Opts)
-			if err != nil {
-				log.Warnf("unable to get ssh auth, skipping clone and scan for repo %s: %+v\n", *repo.CloneURL, err)
-				continue
-			}
-			err = r.Clone(&git.CloneOptions{
-				URL:  *repo.SSHURL,
-				Auth: auth,
-			})
-			if err != nil {
-				log.Warnf("err cloning %s, skipping clone and scan: %+v\n", *repo.SSHURL, err)
-				continue
-			}
-		}
-		if err = r.Scan(); err != nil {
-			log.Warn(err)
-		}
-	}
-}
-
-// ScanPR scan a single github PR
-func (g *Github) ScanPR() {
-	ctx := context.Background()
-	splits := strings.Split(g.manager.Opts.PullRequest, "/")
-	owner := splits[len(splits)-4]
-	repoName := splits[len(splits)-3]
-	prNum, err := strconv.Atoi(splits[len(splits)-1])
-	repo := scan.NewRepo(g.manager)
-	repo.Name = repoName
-	log.Infof("scanning pr %s\n", g.manager.Opts.PullRequest)
-
-	if err != nil {
-		return
-	}
-	page := 1
-	for {
-		commits, resp, err := g.client.PullRequests.ListCommits(ctx, owner, repoName, prNum, &github.ListOptions{
-			PerPage: 100, Page: page})
-		if err != nil {
-			return
-		}
-		for _, c := range commits {
-			c, _, err := g.client.Repositories.GetCommit(ctx, owner, repo.Name, *c.SHA)
-			if err != nil {
-				continue
-			}
-			commitObj := object.Commit{
-				Hash: plumbing.NewHash(*c.SHA),
-				Author: object.Signature{
-					Name:  *c.Commit.Author.Name,
-					Email: *c.Commit.Author.Email,
-					When:  *c.Commit.Author.Date,
-				},
-			}
-			for _, f := range c.Files {
-				if f.Patch == nil {
-					continue
-				}
-				repo.CheckRules(&scan.Bundle{
-					Content:  *f.Patch,
-					FilePath: *f.Filename,
-					Commit:   &commitObj,
-				})
-			}
-		}
-		page = resp.NextPage
-		if resp.LastPage == 0 {
-			break
-		}
-	}
-}

+ 0 - 112
hosts/gitlab.go

@@ -1,112 +0,0 @@
-package hosts
-
-import (
-	"context"
-	"sync"
-
-	"github.com/zricethezav/gitleaks/v6/manager"
-	"github.com/zricethezav/gitleaks/v6/options"
-	"github.com/zricethezav/gitleaks/v6/scan"
-
-	log "github.com/sirupsen/logrus"
-	"github.com/xanzy/go-gitlab"
-)
-
-// Gitlab wraps a gitlab client and manager. This struct implements what the Host interface defines.
-type Gitlab struct {
-	client  *gitlab.Client
-	manager *manager.Manager
-	ctx     context.Context
-	wg      sync.WaitGroup
-}
-
-// NewGitlabClient accepts a manager struct and returns a Gitlab host pointer which will be used to
-// perform a gitlab scan on an group or user.
-func NewGitlabClient(m *manager.Manager) (*Gitlab, error) {
-	var err error
-
-	gitlabClient := &Gitlab{
-		manager: m,
-		ctx:     context.Background(),
-		client:  gitlab.NewClient(nil, options.GetAccessToken(m.Opts)),
-	}
-
-	if m.Opts.BaseURL != "" {
-		err = gitlabClient.client.SetBaseURL(m.Opts.BaseURL)
-	}
-
-	return gitlabClient, err
-}
-
-// Scan will scan a github user or organization's repos.
-func (g *Gitlab) Scan() {
-	var (
-		projects []*gitlab.Project
-		resp     *gitlab.Response
-		err      error
-	)
-
-	page := 1
-	listOpts := gitlab.ListOptions{
-		PerPage: 100,
-		Page:    page,
-	}
-	for {
-		var _projects []*gitlab.Project
-		if g.manager.Opts.User != "" {
-			glOpts := &gitlab.ListProjectsOptions{
-				ListOptions: listOpts,
-			}
-			_projects, resp, err = g.client.Projects.ListUserProjects(g.manager.Opts.User, glOpts)
-
-		} else if g.manager.Opts.Organization != "" {
-			glOpts := &gitlab.ListGroupProjectsOptions{
-				ListOptions: listOpts,
-			}
-			_projects, resp, err = g.client.Groups.ListGroupProjects(g.manager.Opts.Organization, glOpts)
-		}
-		if err != nil {
-			log.Error(err)
-		}
-
-		for _, p := range _projects {
-			if g.manager.Opts.ExcludeForks && p.ForkedFromProject != nil {
-				log.Debugf("excluding forked repo: %s", p.Name)
-				continue
-			}
-			projects = append(projects, p)
-		}
-
-		if resp == nil {
-			break
-		}
-		if page >= resp.TotalPages {
-			// exit when we've seen all pages
-			break
-		}
-		page = resp.NextPage
-	}
-
-	// iterate of gitlab projects
-	for _, p := range projects {
-		r := scan.NewRepo(g.manager)
-		cloneOpts := g.manager.CloneOptions
-		cloneOpts.URL = p.HTTPURLToRepo
-		err := r.Clone(cloneOpts)
-		if err != nil {
-			log.Error(err)
-			continue
-		}
-		// TODO handle clone retry with ssh like github host
-		r.Name = p.Name
-
-		if err = r.Scan(); err != nil {
-			log.Error(err)
-		}
-	}
-}
-
-// ScanPR TODO not implemented
-func (g *Gitlab) ScanPR() {
-	log.Error("ScanPR is not implemented in Gitlab host yet...")
-}

+ 0 - 53
hosts/host.go

@@ -1,53 +0,0 @@
-package hosts
-
-import (
-	"strings"
-
-	"github.com/zricethezav/gitleaks/v6/manager"
-)
-
-const (
-	_github int = iota + 1
-	_gitlab
-)
-
-// Host is an interface used for defining external git hosting providers like github and gitlab.
-// TODO add bitbucket
-type Host interface {
-	Scan()
-	ScanPR()
-}
-
-// Run kicks off a host scan. This function accepts a manager and determines what host it should scan
-func Run(m *manager.Manager) error {
-	var host Host
-	var err error
-	switch getHost(m.Opts.Host) {
-	case _github:
-		host, err = NewGithubClient(m)
-	case _gitlab:
-		host, err = NewGitlabClient(m)
-	default:
-		return nil
-	}
-
-	if err != nil {
-		return err
-	}
-
-	if m.Opts.PullRequest != "" {
-		host.ScanPR()
-	} else {
-		host.Scan()
-	}
-	return err
-}
-
-func getHost(host string) int {
-	if strings.ToLower(host) == "github" {
-		return _github
-	} else if strings.ToLower(host) == "gitlab" {
-		return _gitlab
-	}
-	return -1
-}

+ 0 - 116
hosts/hosts_test.go

@@ -1,116 +0,0 @@
-package hosts
-
-import (
-	"flag"
-	"fmt"
-	"os"
-	"testing"
-
-	"github.com/zricethezav/gitleaks/v6/config"
-	"github.com/zricethezav/gitleaks/v6/manager"
-	"github.com/zricethezav/gitleaks/v6/options"
-)
-
-var (
-	integration = flag.Bool("integration", false, "run github/gitlab integration test")
-)
-
-func TestGithub(t *testing.T) {
-	flag.Parse()
-	if !*integration {
-		fmt.Println("skipping github integration tests")
-		return
-	}
-	if os.Getenv("GITHUB_TOKEN") == "" {
-		t.Log("skipping github integration tests, need env var GITLAB_TOKEN")
-		return
-	}
-
-	tests := []struct {
-		opts         options.Options
-		desiredLeaks int
-	}{
-		{
-			opts: options.Options{
-				Host:        "github",
-				User:        "gitleakstest",
-				AccessToken: os.Getenv("GITHUB_TOKEN"),
-			},
-			desiredLeaks: 2,
-		},
-		{
-			opts: options.Options{
-				Host:        "github",
-				PullRequest: "https://github.com/gitleakstest/gronit/pull/1",
-				AccessToken: os.Getenv("GITHUB_TOKEN"),
-			},
-			desiredLeaks: 4,
-		},
-	}
-
-	for _, test := range tests {
-		cfg, err := config.NewConfig(test.opts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		m, err := manager.NewManager(test.opts, cfg)
-		if err != nil {
-			t.Error(err)
-		}
-		err = Run(m)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if test.desiredLeaks != len(m.GetLeaks()) {
-			t.Errorf("got %d leaks, want %d", len(m.GetLeaks()), test.desiredLeaks)
-		}
-	}
-}
-
-func TestGitlab(t *testing.T) {
-	flag.Parse()
-	if !*integration {
-		fmt.Println("skipping gitlab integration tests")
-		return
-	}
-	if os.Getenv("GITLAB_TOKEN") == "" {
-		t.Log("skipping github integration tests, need env var GITLAB_TOKEN")
-		return
-	}
-
-	tests := []struct {
-		opts         options.Options
-		desiredLeaks int
-	}{
-		{
-			opts: options.Options{
-				Host:        "gitlab",
-				User:        "gitleakstest",
-				AccessToken: os.Getenv("GITLAB_TOKEN"),
-			},
-			desiredLeaks: 2,
-		},
-	}
-
-	for _, test := range tests {
-		cfg, err := config.NewConfig(test.opts)
-		if err != nil {
-			t.Error(err)
-		}
-
-		m, err := manager.NewManager(test.opts, cfg)
-		if err != nil {
-			t.Error(err)
-		}
-		err = Run(m)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if test.desiredLeaks != len(m.GetLeaks()) {
-			t.Errorf("got %d leaks, want %d", len(m.GetLeaks()), test.desiredLeaks)
-		}
-	}
-}

+ 33 - 58
main.go

@@ -1,97 +1,72 @@
 package main
 
 import (
-	"io/ioutil"
 	"os"
+	"os/signal"
 	"time"
 
-	"github.com/zricethezav/gitleaks/v6/config"
-	"github.com/zricethezav/gitleaks/v6/hosts"
-	"github.com/zricethezav/gitleaks/v6/manager"
-	"github.com/zricethezav/gitleaks/v6/options"
-	"github.com/zricethezav/gitleaks/v6/scan"
+	"github.com/zricethezav/gitleaks/v7/report"
+
+	"github.com/zricethezav/gitleaks/v7/config"
+	"github.com/zricethezav/gitleaks/v7/options"
+	"github.com/zricethezav/gitleaks/v7/scan"
 
 	"github.com/hako/durafmt"
 	log "github.com/sirupsen/logrus"
 )
 
 func main() {
+	// this block sets up a go routine to listen for an interrupt signal
+	// which will immediately exit gitleaks
+	stopChan := make(chan os.Signal, 1)
+	signal.Notify(stopChan, os.Interrupt)
+	go listenForInterrupt(stopChan)
+
+	// setup options
 	opts, err := options.ParseOptions()
 	if err != nil {
 		log.Error(err)
-		os.Exit(options.ErrorEncountered)
+		os.Exit(1)
 	}
 
 	err = opts.Guard()
 	if err != nil {
 		log.Error(err)
-		os.Exit(options.ErrorEncountered)
+		os.Exit(1)
 	}
 
+	// setup configs
 	cfg, err := config.NewConfig(opts)
 	if err != nil {
 		log.Error(err)
-		os.Exit(options.ErrorEncountered)
+		os.Exit(1)
 	}
 
-	m, err := manager.NewManager(opts, cfg)
+	// setup scanner
+	scanner, err := scan.NewScanner(opts, cfg)
 	if err != nil {
 		log.Error(err)
-		os.Exit(options.ErrorEncountered)
+		os.Exit(1)
 	}
 
-	err = Run(m)
+	// run and time the scan
+	start := time.Now()
+	scannerReport, err := scanner.Scan()
+	log.Info("scan time: ", durafmt.Parse(time.Now().Sub(start)))
 	if err != nil {
 		log.Error(err)
-		os.Exit(options.ErrorEncountered)
+		os.Exit(1)
 	}
 
-	leaks := m.GetLeaks()
-	metadata := m.GetMetadata()
-
-	if len(m.GetLeaks()) != 0 {
-		if m.Opts.CheckUncommitted() {
-			log.Warnf("%d leaks detected in staged changes", len(leaks))
-		} else {
-			log.Warnf("%d leaks detected. %d commits scanned in %s", len(leaks),
-				metadata.Commits, durafmt.Parse(time.Duration(metadata.ScanTime)*time.Nanosecond))
-		}
-		os.Exit(options.LeaksPresent)
-	} else {
-		if m.Opts.CheckUncommitted() {
-			log.Infof("No leaks detected in staged changes")
-		} else {
-			log.Infof("No leaks detected. %d commits scanned in %s",
-				metadata.Commits, durafmt.Parse(time.Duration(metadata.ScanTime)*time.Nanosecond))
-		}
-		os.Exit(options.Success)
+	// report scan
+	if err := report.WriteReport(scannerReport, opts, cfg); err != nil {
+		log.Error(err)
+		os.Exit(1)
 	}
 }
 
-// Run begins the program and contains some basic logic on how to continue with the scan. If any external git host
-// options are set (like scanning a gitlab or github user) then a specific host client will be created and
-// then Scan() and Report() will be called. Otherwise, gitleaks will create a new repo and an scan will proceed.
-// If no options or the uncommitted option is set then a pre-commit scan will
-// take place -- this is similar to running `git diff` on all the tracked files.
-func Run(m *manager.Manager) error {
-	if m.Opts.Disk {
-		dir, err := ioutil.TempDir("", "gitleaks")
-		defer os.RemoveAll(dir)
-		if err != nil {
-			return err
-		}
-		m.CloneDir = dir
-	}
-
-	var err error
-	if m.Opts.Host != "" {
-		err = hosts.Run(m)
-	} else {
-		err = scan.Run(m)
-	}
-	if err != nil {
-		return err
-	}
-
-	return m.Report()
+func listenForInterrupt(stopScan chan os.Signal) {
+	<-stopScan
+	log.Warn("halting gitleaks scan")
+	os.Exit(1)
 }

+ 0 - 274
manager/manager.go

@@ -1,274 +0,0 @@
-package manager
-
-import (
-	"crypto/sha1"
-	"encoding/hex"
-	"encoding/json"
-	"fmt"
-	"os"
-	"os/signal"
-	"runtime"
-	"strings"
-	"sync"
-	"text/tabwriter"
-	"time"
-
-	"github.com/zricethezav/gitleaks/v6/config"
-	"github.com/zricethezav/gitleaks/v6/options"
-
-	"github.com/go-git/go-git/v5"
-	"github.com/hako/durafmt"
-	"github.com/mattn/go-colorable"
-	log "github.com/sirupsen/logrus"
-)
-
-const maxLineLen = 200
-
-// Manager is a struct containing options and configs as well CloneOptions and CloneDir.
-// This struct is passed into each NewRepo so we are not passing around the manager in func params.
-type Manager struct {
-	Opts   options.Options
-	Config config.Config
-
-	CloneOptions *git.CloneOptions
-	CloneDir     string
-
-	leaks     []Leak
-	leakChan  chan Leak
-	leakWG    *sync.WaitGroup
-	leakCache map[string]bool
-
-	stopChan chan os.Signal
-	metadata Metadata
-	metaWG   *sync.WaitGroup
-}
-
-// Leak is a struct that contains information about some line of code that contains
-// sensitive information as determined by the rules set in a gitleaks config
-type Leak struct {
-	Line       string    `json:"line"`
-	LineNumber int       `json:"lineNumber"`
-	Offender   string    `json:"offender"`
-	Commit     string    `json:"commit"`
-	Repo       string    `json:"repo"`
-	Rule       string    `json:"rule"`
-	Message    string    `json:"commitMessage"`
-	Author     string    `json:"author"`
-	Email      string    `json:"email"`
-	File       string    `json:"file"`
-	Date       time.Time `json:"date"`
-	Tags       string    `json:"tags"`
-	Operation  string    `json:"operation"`
-	lookupHash string
-}
-
-// ScanTime is a type used to determine total scan time
-type ScanTime int64
-
-// PatchTime is a type used to determine total patch time during an scan
-type PatchTime int64
-
-// CloneTime is a type used to determine total clone time
-type CloneTime int64
-
-// RegexTime is a type used to determine the time each rules' regex takes. This is especially useful
-// if you notice that gitleaks is taking a long time. You can use --debug to see the output of the regexTime
-// so you can determine which regex is not performing well.
-type RegexTime struct {
-	Time  int64
-	Regex string
-}
-
-// Metadata is a struct used to communicate metadata about an scan like timings and total commit counts.
-type Metadata struct {
-	mux  *sync.Mutex
-	data map[string]interface{}
-
-	timings chan interface{}
-
-	RegexTime map[string]int64
-	Commits   int
-	ScanTime  int64
-	patchTime int64
-	cloneTime int64
-}
-
-func init() {
-	log.SetOutput(os.Stdout)
-	log.SetFormatter(&log.TextFormatter{
-		ForceColors:   true,
-		FullTimestamp: true,
-	})
-	// Fix colors on Windows
-	if runtime.GOOS == "windows" {
-		log.SetOutput(colorable.NewColorableStdout())
-	}
-}
-
-// NewManager accepts options and returns a manager struct. The manager is a container for gitleaks configurations,
-// options and channel receivers.
-func NewManager(opts options.Options, cfg config.Config) (*Manager, error) {
-	cloneOpts, err := opts.CloneOptions()
-	if err != nil {
-		return nil, err
-	}
-
-	m := &Manager{
-		Opts:         opts,
-		Config:       cfg,
-		CloneOptions: cloneOpts,
-
-		stopChan:  make(chan os.Signal, 1),
-		leakChan:  make(chan Leak),
-		leakWG:    &sync.WaitGroup{},
-		leakCache: make(map[string]bool),
-		metaWG:    &sync.WaitGroup{},
-		metadata: Metadata{
-			RegexTime: make(map[string]int64),
-			timings:   make(chan interface{}),
-			data:      make(map[string]interface{}),
-			mux:       new(sync.Mutex),
-		},
-	}
-
-	signal.Notify(m.stopChan, os.Interrupt)
-
-	// start receiving leaks and metadata
-	go m.receiveLeaks()
-	go m.receiveMetadata()
-	go m.receiveInterrupt()
-
-	return m, nil
-}
-
-// GetLeaks returns all available leaks
-func (manager *Manager) GetLeaks() []Leak {
-	// need to wait for any straggling leaks
-	manager.leakWG.Wait()
-	return manager.leaks
-}
-
-// SendLeaks accepts a leak and is used by the scan pkg. This is the public function
-// that allows other packages to send leaks to the manager.
-func (manager *Manager) SendLeaks(l Leak) {
-	if len(l.Line) > maxLineLen {
-		l.Line = l.Line[0:maxLineLen-1] + "..."
-	}
-	if len(l.Offender) > maxLineLen {
-		l.Offender = l.Offender[0:maxLineLen-1] + "..."
-	}
-	h := sha1.New()
-	h.Write([]byte(l.Commit + l.Offender + l.File + l.Line + string(l.LineNumber)))
-	l.lookupHash = hex.EncodeToString(h.Sum(nil))
-	if manager.Opts.Redact {
-		l.Line = strings.ReplaceAll(l.Line, l.Offender, "REDACTED")
-		l.Offender = "REDACTED"
-	}
-	manager.leakWG.Add(1)
-	manager.leakChan <- l
-}
-
-func (manager *Manager) alreadySeen(leak Leak) bool {
-	if _, ok := manager.leakCache[leak.lookupHash]; ok {
-		return true
-	}
-	manager.leakCache[leak.lookupHash] = true
-	return false
-}
-
-// receiveLeaks listens to leakChan for incoming leaks. If any are received, they are appended to the
-// manager's leaks for future reporting. If the -v/--verbose option is set the leaks will marshaled into
-// json and printed out.
-func (manager *Manager) receiveLeaks() {
-	for leak := range manager.leakChan {
-		if manager.alreadySeen(leak) {
-			manager.leakWG.Done()
-			continue
-		}
-		manager.leaks = append(manager.leaks, leak)
-		if manager.Opts.Verbose {
-			var b []byte
-			if manager.Opts.PrettyPrint {
-				b, _ = json.MarshalIndent(leak, "", "	")
-			} else {
-				b, _ = json.Marshal(leak)
-			}
-			fmt.Println(string(b))
-		}
-		manager.leakWG.Done()
-	}
-}
-
-// GetMetadata returns the metadata. TODO this may not need to be private
-func (manager *Manager) GetMetadata() Metadata {
-	manager.metaWG.Wait()
-	return manager.metadata
-}
-
-// receiveMetadata is where the messages sent to the metadata channel get consumed. You can view metadata
-// by running gitleaks with the --debug option set. This is extremely useful when trying to optimize regular
-// expressions as that what gitleaks spends most of its cycles on.
-func (manager *Manager) receiveMetadata() {
-	for t := range manager.metadata.timings {
-		switch ti := t.(type) {
-		case CloneTime:
-			manager.metadata.cloneTime += int64(ti)
-		case ScanTime:
-			manager.metadata.ScanTime += int64(ti)
-		case PatchTime:
-			manager.metadata.patchTime += int64(ti)
-		case RegexTime:
-			manager.metadata.RegexTime[ti.Regex] = manager.metadata.RegexTime[ti.Regex] + ti.Time
-		}
-		manager.metaWG.Done()
-	}
-}
-
-// IncrementCommits increments total commits during an scan by i.
-func (manager *Manager) IncrementCommits(i int) {
-	manager.metadata.mux.Lock()
-	manager.metadata.Commits += i
-	manager.metadata.mux.Unlock()
-}
-
-// RecordTime accepts an interface and sends it to the manager's time channel
-func (manager *Manager) RecordTime(t interface{}) {
-	manager.metaWG.Add(1)
-	manager.metadata.timings <- t
-}
-
-// DebugOutput logs metadata and other messages that occurred during a gitleaks scan
-func (manager *Manager) DebugOutput() {
-	log.Debugf("-------------------------\n")
-	log.Debugf("| Times and Commit Counts|\n")
-	log.Debugf("-------------------------\n")
-	fmt.Println("totalScanTime: ", durafmt.Parse(time.Duration(manager.metadata.ScanTime)*time.Nanosecond))
-	fmt.Println("totalPatchTime: ", durafmt.Parse(time.Duration(manager.metadata.patchTime)*time.Nanosecond))
-	fmt.Println("totalCloneTime: ", durafmt.Parse(time.Duration(manager.metadata.cloneTime)*time.Nanosecond))
-	fmt.Println("totalCommits: ", manager.metadata.Commits)
-
-	const padding = 6
-	w := tabwriter.NewWriter(os.Stdout, 0, 0, padding, '.', 0)
-
-	log.Debugf("--------------------------\n")
-	log.Debugf("| Individual Regexes Times |\n")
-	log.Debugf("--------------------------\n")
-	for k, v := range manager.metadata.RegexTime {
-		_, _ = fmt.Fprintf(w, "%s\t%s\n", k, durafmt.Parse(time.Duration(v)*time.Nanosecond))
-	}
-	_ = w.Flush()
-
-}
-
-
-func (manager *Manager) receiveInterrupt() {
-	<-manager.stopChan
-	if manager.Opts.Report != "" {
-		err := manager.Report()
-		if err != nil {
-			log.Error(err)
-		}
-	}
-	log.Info("gitleaks received interrupt, stopping scan")
-	os.Exit(options.ErrorEncountered)
-}

+ 0 - 106
manager/manager_test.go

@@ -1,106 +0,0 @@
-package manager
-
-import (
-	"crypto/rand"
-	"fmt"
-	"github.com/zricethezav/gitleaks/v6/config"
-	"github.com/zricethezav/gitleaks/v6/options"
-	"io"
-	"testing"
-)
-
-// TODO
-// add more substantial tests... but since literally every pkg uses manager
-// these tests are kind of redundant
-func TestSendReceiveLeaks(t *testing.T) {
-
-	tests := []struct {
-		leaksToAdd int
-		goRoutines int
-	}{
-		{
-			leaksToAdd: 10,
-		},
-		{
-			leaksToAdd: 1000,
-		},
-	}
-	for _, test := range tests {
-		opts := options.Options{}
-		cfg, _ := config.NewConfig(opts)
-		m, _ := NewManager(opts, cfg)
-
-		for i := 0; i < test.leaksToAdd; i++ {
-			// we are testing the sync of sending/receiving leaks so we need
-			// the hash generation in sendLeaks to be unique for each iteration
-			// so I'm just setting the offender string as a uuid
-			m.SendLeaks(Leak{Offender: newUUID()})
-		}
-		got := m.GetLeaks()
-		if len(got) != test.leaksToAdd {
-			t.Errorf("got %d, wanted %d leaks", len(got), test.leaksToAdd)
-		}
-	}
-}
-
-func TestSendReceiveMeta(t *testing.T) {
-	tests := []struct {
-		scanTime   int64
-		patchTime  int64
-		cloneTime  int64
-		regexTime  int64
-		iterations int
-	}{
-		{
-			scanTime:   1000,
-			patchTime:  1000,
-			cloneTime:  1000,
-			regexTime:  1000,
-			iterations: 100,
-		},
-	}
-	for _, test := range tests {
-		opts := options.Options{}
-		cfg, _ := config.NewConfig(opts)
-		m, _ := NewManager(opts, cfg)
-
-		for i := 0; i < test.iterations; i++ {
-			m.RecordTime(ScanTime(test.scanTime))
-			m.RecordTime(PatchTime(test.patchTime))
-			m.RecordTime(CloneTime(test.cloneTime))
-			m.RecordTime(RegexTime{
-				Regex: "regex",
-				Time:  test.regexTime,
-			})
-			m.RecordTime(RegexTime{
-				Regex: "regex2",
-				Time:  test.regexTime,
-			})
-		}
-		md := m.GetMetadata()
-		if md.cloneTime != test.cloneTime*int64(test.iterations) {
-			t.Errorf("clone time mismatch, got %d, wanted %d",
-				md.cloneTime, test.cloneTime*int64(test.iterations))
-		}
-		if md.ScanTime != test.scanTime*int64(test.iterations) {
-			t.Errorf("scan time mismatch, got %d, wanted %d",
-				md.ScanTime, test.scanTime*int64(test.iterations))
-		}
-		if md.patchTime != test.patchTime*int64(test.iterations) {
-			t.Errorf("clone time mismatch, got %d, wanted %d",
-				md.patchTime, test.patchTime*int64(test.iterations))
-		}
-	}
-}
-
-// newUUID generates a random UUID according to RFC 4122
-// Ripped from https://play.golang.org/p/4FkNSiUDMg
-func newUUID() string {
-	uuid := make([]byte, 16)
-	io.ReadFull(rand.Reader, uuid)
-	// variant bits; see section 4.1.1
-	uuid[8] = uuid[8]&^0xc0 | 0x80
-	// version 4 (pseudo-random); see section 4.1.3
-	uuid[6] = uuid[6]&^0xf0 | 0x40
-	return fmt.Sprintf("%x-%x-%x-%x-%x", uuid[0:4], uuid[4:6], uuid[6:8], uuid[8:10], uuid[10:])
-}

+ 0 - 78
manager/report.go

@@ -1,78 +0,0 @@
-package manager
-
-import (
-	"encoding/csv"
-	"encoding/json"
-	"os"
-	"time"
-
-	"github.com/zricethezav/gitleaks/v6/version"
-
-	log "github.com/sirupsen/logrus"
-)
-
-// Report saves gitleaks leaks to a json specified by --report={report.json}
-func (manager *Manager) Report() error {
-	close(manager.leakChan)
-	close(manager.metadata.timings)
-
-	if log.IsLevelEnabled(log.DebugLevel) {
-		manager.DebugOutput()
-	}
-
-	if manager.Opts.Report != "" {
-		if len(manager.GetLeaks()) == 0 {
-			log.Infof("no leaks found, skipping writing report")
-			return nil
-		}
-		file, err := os.Create(manager.Opts.Report)
-		if err != nil {
-			return err
-		}
-
-		switch manager.Opts.ReportFormat {
-		case "json":
-			encoder := json.NewEncoder(file)
-			encoder.SetIndent("", " ")
-			err = encoder.Encode(manager.leaks)
-			if err != nil {
-				return err
-			}
-		case "csv":
-			w := csv.NewWriter(file)
-			_ = w.Write([]string{"repo", "line", "commit", "offender", "rule", "tags", "commitMsg", "author", "email", "file", "date"})
-			for _, leak := range manager.GetLeaks() {
-				w.Write([]string{leak.Repo, leak.Line, leak.Commit, leak.Offender, leak.Rule, leak.Tags, leak.Message, leak.Author, leak.Email, leak.File, leak.Date.Format(time.RFC3339)})
-			}
-			w.Flush()
-		case "sarif":
-			s := Sarif{
-				Schema:  "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
-				Version: "2.1.0",
-				Runs: []Runs{
-					{
-						Tool: Tool{
-							Driver: Driver{
-								Name:            "Gitleaks",
-								SemanticVersion: version.Version,
-								Rules:           manager.configToRules(),
-							},
-						},
-						Results: manager.leaksToResults(),
-					},
-				},
-			}
-			encoder := json.NewEncoder(file)
-			encoder.SetIndent("", " ")
-			err = encoder.Encode(s)
-			if err != nil {
-				return err
-			}
-		}
-		_ = file.Close()
-
-		log.Infof("report written to %s", manager.Opts.Report)
-	}
-	return nil
-}
-

+ 74 - 119
options/options.go

@@ -7,69 +7,52 @@ import (
 	"os/user"
 	"strings"
 
-	"github.com/zricethezav/gitleaks/v6/version"
+	"github.com/zricethezav/gitleaks/v7/version"
 
 	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing"
+	"github.com/go-git/go-git/v5/plumbing/transport"
 	"github.com/go-git/go-git/v5/plumbing/transport/http"
 	"github.com/go-git/go-git/v5/plumbing/transport/ssh"
 	"github.com/jessevdk/go-flags"
 	log "github.com/sirupsen/logrus"
 )
 
-// No leaks or early exit due to invalid options
-// This block defines the exit codes. Success
-const (
-	// No leaks or early exit due to invalid options
-	Success          = 0
-	LeaksPresent     = 1
-	ErrorEncountered = 2
-	donateMessage    = "👋 maintaining gitleaks takes a lot of work so consider sponsoring me or donating a little something\n❤️ https://github.com/sponsors/zricethezav\n💸 https://www.paypal.me/zricethezav\n₿  btc:3GndEzRZa6rJ8ZpkLureUcc5TDHMYfpDxn"
-)
-
 // Options stores values of command line options
 type Options struct {
-	Verbose         bool   `short:"v" long:"verbose" description:"Show verbose output from scan"`
-	Repo            string `short:"r" long:"repo" description:"Target repository"`
-	Config          string `long:"config" description:"config path"`
-	Disk            bool   `long:"disk" description:"Clones repo(s) to disk"`
-	Version         bool   `long:"version" description:"version number"`
-	Username        string `long:"username" description:"Username for git repo"`
-	Password        string `long:"password" description:"Password for git repo"`
-	AccessToken     string `long:"access-token" description:"Access token for git repo"`
-	FilesAtCommit   string `long:"files-at-commit" description:"sha of commit to scan all files at commit"`
-	Threads         int    `long:"threads" description:"Maximum number of threads gitleaks spawns"`
-	SSH             string `long:"ssh-key" description:"path to ssh key used for auth"`
-	Uncommited      bool   `long:"uncommitted" description:"run gitleaks on uncommitted code"`
-	RepoPath        string `long:"repo-path" description:"Path to repo"`
-	OwnerPath       string `long:"owner-path" description:"Path to owner directory (repos discovered)"`
-	Branch          string `long:"branch" description:"Branch to scan"`
-	Report          string `long:"report" description:"path to write json leaks file"`
-	ReportFormat    string `long:"report-format" default:"json" description:"json, csv, sarif"`
-	Redact          bool   `long:"redact" description:"redact secrets from log messages and leaks"`
-	Debug           bool   `long:"debug" description:"log debug messages"`
-	RepoConfig      bool   `long:"repo-config" description:"Load config from target repo. Config file must be \".gitleaks.toml\" or \"gitleaks.toml\""`
-	PrettyPrint     bool   `long:"pretty" description:"Pretty print json if leaks are present"`
+	Verbose        bool   `short:"v" long:"verbose" description:"Show verbose output from scan"`
+	RepoURL        string `short:"r" long:"repo-url" description:"Repository URL"`
+	Path           string `short:"p" long:"path" description:"Path to directory (repo if contains .git) or file"`
+	ConfigPath     string `short:"c" long:"config-path" description:"Path to config"`
+	RepoConfigPath string `long:"repo-config-path" description:"Path to gitleaks config relative to repo root"`
+	ClonePath      string `long:"clone-path" description:"Path to clone repo to disk"`
+	CleanUp        string `long:"clone-cleanup" description:"Deletes cloned repo after scan"`
+	Version        bool   `long:"version" description:"Version number"`
+	Username       string `long:"username" description:"Username for git repo"`
+	Password       string `long:"password" description:"Password for git repo"`
+	AccessToken    string `long:"access-token" description:"Access token for git repo"`
+	Threads        int    `long:"threads" description:"Maximum number of threads gitleaks spawns"`
+	SSH            string `long:"ssh-key" description:"Path to ssh key used for auth"`
+	Unstaged       bool   `long:"unstaged" description:"Run gitleaks on unstaged code"`
+	Branch         string `long:"branch" description:"Branch to scan"`
+	Redact         bool   `long:"redact" description:"Redact secrets from log messages and leaks"`
+	Debug          bool   `long:"debug" description:"Log debug messages"`
+	NoGit          bool   `long:"no-git" description:"Treat git repos as plain directories and scan those files"`
+
+	// Report Options
+	Report       string `short:"o" long:"report" description:"Report output path"`
+	ReportFormat string `short:"f" long:"format" default:"json" description:"JSON, CSV, SARIF"`
 
 	// Commit Options
-	Commit      string `long:"commit" description:"sha of commit to scan or \"latest\" to scan the last commit of the repository"`
-	Commits     string `long:"commits" description:"comma separated list of a commits to scan"`
-	CommitsFile string `long:"commits-file" description:"file of new line separated list of a commits to scan"`
-	CommitFrom  string `long:"commit-from" description:"Commit to start scan from"`
-	CommitTo    string `long:"commit-to" description:"Commit to stop scan"`
-	CommitSince string `long:"commit-since" description:"Scan commits more recent than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
-	CommitUntil string `long:"commit-until" description:"Scan commits older than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
-
-	Timeout         string `long:"timeout" description:"Time allowed per scan. Ex: 10us, 30s, 1m, 1h10m1s"`
-	Depth           int    `long:"depth" description:"Number of commits to scan"`
-	Deletion        bool   `long:"include-deletion" description:"Scan for patch deletions in addition to patch additions"`
-
-	// Hosts
-	Host         string `long:"host" description:"git hosting service like gitlab or github. Supported hosts include: Github, Gitlab"`
-	BaseURL      string `long:"baseurl" description:"Base URL for API requests. Defaults to the public GitLab or GitHub API, but can be set to a domain endpoint to use with a self hosted server."`
-	Organization string `long:"org" description:"organization to scan"`
-	User         string `long:"user" description:"user to scan"`
-	PullRequest  string `long:"pr" description:"pull/merge request url"`
-	ExcludeForks bool   `long:"exclude-forks" description:"scan excludes forks"`
+	FilesAtCommit string `long:"files-at-commit" description:"Sha of commit to scan all files at commit"`
+	Commit        string `long:"commit" description:"Sha of commit to scan or \"latest\" to scan the last commit of the repository"`
+	Commits       string `long:"commits" description:"Comma separated list of a commits to scan"`
+	CommitsFile   string `long:"commits-file" description:"Path to file of line separated list of commits to scan"`
+	CommitFrom    string `long:"commit-from" description:"Commit to start scan from"`
+	CommitTo      string `long:"commit-to" description:"Commit to stop scan"`
+	CommitSince   string `long:"commit-since" description:"Scan commits more recent than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
+	CommitUntil   string `long:"commit-until" description:"Scan commits older than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
+	Depth         int    `long:"depth" description:"Number of commits to scan"`
 }
 
 // ParseOptions is responsible for parsing options passed in by cli. An Options struct
@@ -85,7 +68,6 @@ func ParseOptions() (Options, error) {
 		if flagsErr, ok := err.(*flags.Error); ok && flagsErr.Type != flags.ErrHelp {
 			parser.WriteHelp(os.Stdout)
 		}
-		fmt.Println(donateMessage)
 		os.Exit(0)
 	}
 
@@ -95,7 +77,7 @@ func ParseOptions() (Options, error) {
 		} else {
 			fmt.Printf("%s\n", version.Version)
 		}
-		os.Exit(Success)
+		os.Exit(0)
 	}
 
 	if opts.Debug {
@@ -109,10 +91,7 @@ func ParseOptions() (Options, error) {
 // If invalid sets of options are present, a descriptive error will return
 // else nil is returned
 func (opts Options) Guard() error {
-	if !oneOrNoneSet(opts.Repo, opts.OwnerPath, opts.RepoPath, opts.Host) {
-		return fmt.Errorf("only one target option must can be set. target options: repo, owner-path, repo-path, host")
-	}
-	if !oneOrNoneSet(opts.Organization, opts.User, opts.PullRequest) {
+	if !oneOrNoneSet(opts.RepoURL, opts.Path) {
 		return fmt.Errorf("only one target option must can be set. target options: repo, owner-path, repo-path, host")
 	}
 	if !oneOrNoneSet(opts.AccessToken, opts.Password) {
@@ -140,60 +119,52 @@ func oneOrNoneSet(optStr ...string) bool {
 // Username/PW or AccessToken is available and the repo target is not using the
 // git protocol then the repo must be a available via no auth.
 func (opts Options) CloneOptions() (*git.CloneOptions, error) {
+	var err error
 	progress := ioutil.Discard
 	if opts.Verbose {
 		progress = os.Stdout
 	}
 
-	if strings.HasPrefix(opts.Repo, "git") {
+	cloneOpts := &git.CloneOptions{
+		URL:      opts.RepoURL,
+		Progress: progress,
+	}
+	if opts.Depth != 0 {
+		cloneOpts.Depth = opts.Depth
+	}
+	if opts.Branch != "" {
+		cloneOpts.ReferenceName = plumbing.NewBranchReferenceName(opts.Branch)
+	}
+
+	var auth transport.AuthMethod
+
+	if strings.HasPrefix(opts.RepoURL, "git") {
 		// using git protocol so needs ssh auth
-		auth, err := SSHAuth(opts)
+		auth, err = SSHAuth(opts)
 		if err != nil {
 			return nil, err
 		}
-		return &git.CloneOptions{
-			URL:      opts.Repo,
-			Auth:     auth,
-			Progress: progress,
-		}, nil
-	}
-	if opts.Password != "" && opts.Username != "" {
+	} else if opts.Password != "" && opts.Username != "" {
 		// auth using username and password
-		return &git.CloneOptions{
-			URL: opts.Repo,
-			Auth: &http.BasicAuth{
-				Username: opts.Username,
-				Password: opts.Password,
-			},
-			Progress: progress,
-		}, nil
-	}
-	if opts.AccessToken != "" {
-		return &git.CloneOptions{
-			URL: opts.Repo,
-			Auth: &http.BasicAuth{
-				Username: "gitleaks_user",
-				Password: opts.AccessToken,
-			},
-			Progress: progress,
-		}, nil
+		auth = &http.BasicAuth{
+			Username: opts.Username,
+			Password: opts.Password,
+		}
+	} else if opts.AccessToken != "" {
+		auth = &http.BasicAuth{
+			Username: "gitleaks_user",
+			Password: opts.AccessToken,
+		}
+	} else if os.Getenv("GITLEAKS_ACCESS_TOKEN") != "" {
+		auth = &http.BasicAuth{
+			Username: "gitleaks_user",
+			Password: os.Getenv("GITLEAKS_ACCESS_TOKEN"),
+		}
 	}
-	if os.Getenv("GITLEAKS_ACCESS_TOKEN") != "" {
-		return &git.CloneOptions{
-			URL: opts.Repo,
-			Auth: &http.BasicAuth{
-				Username: "gitleaks_user",
-				Password: os.Getenv("GITLEAKS_ACCESS_TOKEN"),
-			},
-			Progress: progress,
-		}, nil
+	if auth != nil {
+		cloneOpts.Auth = auth
 	}
-
-	// No Auth, publicly available
-	return &git.CloneOptions{
-		URL:      opts.Repo,
-		Progress: progress,
-	}, nil
+	return cloneOpts, nil
 }
 
 // SSHAuth tried to generate ssh public keys based on what was passed via cli. If no
@@ -215,7 +186,7 @@ func SSHAuth(opts Options) (*ssh.PublicKeys, error) {
 // OpenLocal checks what options are set, if no remote targets are set
 // then return true
 func (opts Options) OpenLocal() bool {
-	if opts.Uncommited || opts.RepoPath != "" || opts.Repo == "" {
+	if opts.Unstaged || opts.Path != "" || opts.RepoURL == "" {
 		return true
 	}
 	return false
@@ -225,33 +196,17 @@ func (opts Options) OpenLocal() bool {
 // or if gitleaks should check the entire git history
 func (opts Options) CheckUncommitted() bool {
 	// check to make sure no remote shit is set
-	if opts.Uncommited {
+	if opts.Unstaged {
 		return true
 	}
 	if opts == (Options{}) {
 		return true
 	}
-	if opts.Repo != "" {
+	if opts.RepoURL != "" {
 		return false
 	}
-	if opts.RepoPath != "" {
-		return false
-	}
-	if opts.OwnerPath != "" {
-		return false
-	}
-	if opts.Host != "" {
+	if opts.Path != "" {
 		return false
 	}
 	return true
 }
-
-// GetAccessToken accepts options and returns a string which is the access token to a git host.
-// Setting this option or environment var is necessary if performing an scan with any of the git hosting providers
-// in the host pkg. The access token set by cli options takes precedence over env vars.
-func GetAccessToken(opts Options) string {
-	if opts.AccessToken != "" {
-		return opts.AccessToken
-	}
-	return os.Getenv("GITLEAKS_ACCESS_TOKEN")
-}

+ 33 - 0
report/leak.go

@@ -0,0 +1,33 @@
+package report
+
+import (
+	"strings"
+	"time"
+)
+
+// Leak is a struct that contains information about some line of code that contains
+// sensitive information as determined by the rules set in a gitleaks config
+type Leak struct {
+	Line       string    `json:"line"`
+	LineNumber int       `json:"lineNumber"`
+	Offender   string    `json:"offender"`
+	Commit     string    `json:"commit"`
+	Repo       string    `json:"repo"`
+	RepoURL    string    `json:"repoURL"`
+	LeakURL    string    `json:"leakURL"`
+	Rule       string    `json:"rule"`
+	Message    string    `json:"commitMessage"`
+	Author     string    `json:"author"`
+	Email      string    `json:"email"`
+	File       string    `json:"file"`
+	Date       time.Time `json:"date"`
+	Tags       string    `json:"tags"`
+}
+
+// RedactLeak will replace the offending string with "REDACTED" in both
+// the offender and line field of the leak which.
+func RedactLeak(leak Leak) Leak {
+	leak.Line = strings.Replace(leak.Line, leak.Offender, "REDACTED", -1)
+	leak.Offender = "REDACTED"
+	return leak
+}

+ 95 - 0
report/report.go

@@ -0,0 +1,95 @@
+package report
+
+import (
+	"encoding/csv"
+	"encoding/json"
+	"os"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v7/config"
+	"github.com/zricethezav/gitleaks/v7/options"
+	"github.com/zricethezav/gitleaks/v7/version"
+
+	log "github.com/sirupsen/logrus"
+)
+
+// Report is a container for leaks and number of commits scanned
+type Report struct {
+	Leaks   []Leak
+	Commits int
+}
+
+// WriteReport accepts a report and options and will write a report if --report has been set
+func WriteReport(report Report, opts options.Options, cfg config.Config) error {
+	if !(opts.NoGit || opts.CheckUncommitted()) {
+		log.Info("commits scanned: ", report.Commits)
+	}
+	if len(report.Leaks) != 0 {
+		log.Warn("leaks found: ", len(report.Leaks))
+	} else {
+		log.Info("No leaks found")
+		return nil
+	}
+
+	if opts.Report == "" {
+		return nil
+	}
+
+	if opts.Redact {
+		var redactedLeaks []Leak
+		for _, leak := range report.Leaks {
+			redactedLeaks = append(redactedLeaks, RedactLeak(leak))
+		}
+		report.Leaks = redactedLeaks
+	}
+
+	file, err := os.Create(opts.Report)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	if opts.Report != "" {
+		switch opts.ReportFormat {
+		case "json":
+			encoder := json.NewEncoder(file)
+			encoder.SetIndent("", " ")
+			err = encoder.Encode(report.Leaks)
+			if err != nil {
+				return err
+			}
+		case "csv":
+			w := csv.NewWriter(file)
+			_ = w.Write([]string{"repo", "line", "commit", "offender", "leakURL", "rule", "tags", "commitMsg", "author", "email", "file", "date"})
+			for _, leak := range report.Leaks {
+				w.Write([]string{leak.Repo, leak.Line, leak.Commit, leak.Offender, leak.LeakURL, leak.Rule, leak.Tags, leak.Message, leak.Author, leak.Email, leak.File, leak.Date.Format(time.RFC3339)})
+			}
+			w.Flush()
+		case "sarif":
+			s := Sarif{
+				Schema:  "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
+				Version: "2.1.0",
+				Runs: []Runs{
+					{
+						Tool: Tool{
+							Driver: Driver{
+								Name:            "Gitleaks",
+								SemanticVersion: version.Version,
+								Rules:           configToRules(cfg),
+							},
+						},
+						Results: leaksToResults(report.Leaks),
+					},
+				},
+			}
+			encoder := json.NewEncoder(file)
+			encoder.SetIndent("", " ")
+			err = encoder.Encode(s)
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}

+ 15 - 13
manager/sarif.go → report/sarif.go

@@ -1,8 +1,10 @@
-package manager
+package report
 
 import (
 	"fmt"
 	"time"
+
+	"github.com/zricethezav/gitleaks/v7/config"
 )
 
 //Sarif ...
@@ -24,8 +26,8 @@ type FullDescription struct {
 
 //Rules ...
 type Rules struct {
-	ID         string          `json:"id"`
-	Name       string          `json:"name"`
+	ID   string `json:"id"`
+	Name string `json:"name"`
 }
 
 //Driver ...
@@ -87,7 +89,6 @@ type ResultProperties struct {
 	Author        string    `json:"author"`
 	Email         string    `json:"email"`
 	CommitMessage string    `json:"commitMessage"`
-	Operation     string    `json:"gitOperation"`
 	Repo          string    `json:"repo"`
 }
 
@@ -97,9 +98,9 @@ type Runs struct {
 	Results []Results `json:"results"`
 }
 
-func (manager *Manager) configToRules() []Rules {
+func configToRules(cfg config.Config) []Rules {
 	var rules []Rules
-	for _, rule := range manager.Config.Rules {
+	for _, rule := range cfg.Rules {
 		rules = append(rules, Rules{
 			ID:   rule.Description,
 			Name: rule.Description,
@@ -108,9 +109,9 @@ func (manager *Manager) configToRules() []Rules {
 	return rules
 }
 
-func (manager *Manager) leaksToResults() []Results {
+func leaksToResults(leaks []Leak) []Results {
 	var results []Results
-	for _, leak := range manager.leaks {
+	for _, leak := range leaks {
 		results = append(results, Results{
 			Message: Message{
 				Text: fmt.Sprintf("%s secret detected", leak.Rule),
@@ -122,7 +123,6 @@ func (manager *Manager) leaksToResults() []Results {
 				Author:        leak.Author,
 				Email:         leak.Email,
 				CommitMessage: leak.Message,
-				Operation:     leak.Operation,
 				Repo:          leak.Repo,
 			},
 			Locations: leakToLocation(leak),
@@ -133,12 +133,15 @@ func (manager *Manager) leaksToResults() []Results {
 }
 
 func leakToLocation(leak Leak) []Locations {
+	uri := leak.File
+	if leak.LeakURL != "" {
+		uri = leak.LeakURL
+	}
 	return []Locations{
 		{
-			PhysicalLocation:
-			PhysicalLocation{
+			PhysicalLocation: PhysicalLocation{
 				ArtifactLocation: ArtifactLocation{
-					URI: leak.File,
+					URI: uri,
 				},
 				Region: Region{
 					StartLine: leak.LineNumber,
@@ -150,4 +153,3 @@ func leakToLocation(leak Leak) []Locations {
 		},
 	}
 }
-

+ 86 - 0
scan/commit.go

@@ -0,0 +1,86 @@
+package scan
+
+import (
+	"fmt"
+
+	"github.com/zricethezav/gitleaks/v7/report"
+
+	"github.com/go-git/go-git/v5"
+	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
+	"github.com/go-git/go-git/v5/plumbing/object"
+)
+
+// CommitScanner is a commit scanner
+type CommitScanner struct {
+	BaseScanner
+	repo     *git.Repository
+	repoName string
+	commit   *object.Commit
+}
+
+// NewCommitScanner creates and returns a commit scanner
+func NewCommitScanner(base BaseScanner, repo *git.Repository, commit *object.Commit) *CommitScanner {
+	cs := &CommitScanner{
+		BaseScanner: base,
+		repo:        repo,
+		commit:      commit,
+		repoName:    getRepoName(base.opts),
+	}
+	cs.scannerType = typeCommitScanner
+	return cs
+}
+
+// Scan kicks off a CommitScanner Scan
+func (cs *CommitScanner) Scan() (report.Report, error) {
+	var scannerReport report.Report
+	if len(cs.commit.ParentHashes) == 0 {
+		facScanner := NewFilesAtCommitScanner(cs.BaseScanner, cs.repo, cs.commit)
+		return facScanner.Scan()
+	}
+
+	err := cs.commit.Parents().ForEach(func(parent *object.Commit) error {
+		defer func() {
+			if err := recover(); err != nil {
+				// sometimes the Patch generation will fail due to a known bug in
+				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
+				// Once a fix has been merged I will remove this recover.
+				return
+			}
+		}()
+		if parent == nil {
+			return nil
+		}
+
+		patch, err := parent.Patch(cs.commit)
+		if err != nil {
+			return fmt.Errorf("could not generate Patch")
+		}
+
+		patchContent := patch.String()
+
+		for _, f := range patch.FilePatches() {
+			if f.IsBinary() {
+				continue
+			}
+			for _, chunk := range f.Chunks() {
+				if chunk.Type() == fdiff.Add {
+					_, to := f.Files()
+					leaks := checkRules(cs.BaseScanner, cs.commit, cs.repoName, to.Path(), chunk.Content())
+
+					lineLookup := make(map[string]bool)
+					for _, leak := range leaks {
+						leak.LineNumber = extractLine(patchContent, leak, lineLookup)
+						leak.LeakURL = leakURL(leak)
+						scannerReport.Leaks = append(scannerReport.Leaks, leak)
+						if cs.opts.Verbose {
+							logLeak(leak, cs.opts.Redact)
+						}
+					}
+				}
+			}
+		}
+		return nil
+	})
+	scannerReport.Commits = 1
+	return scannerReport, err
+}

+ 44 - 0
scan/commits.go

@@ -0,0 +1,44 @@
+package scan
+
+import (
+	"github.com/go-git/go-git/v5"
+	"github.com/zricethezav/gitleaks/v7/report"
+)
+
+// CommitsScanner is a commit scanner
+type CommitsScanner struct {
+	BaseScanner
+
+	repo     *git.Repository
+	repoName string
+	commits  []string
+}
+
+// NewCommitsScanner creates and returns a commits scanner, notice the 's' in commits
+func NewCommitsScanner(base BaseScanner, repo *git.Repository, commits []string) *CommitsScanner {
+	return &CommitsScanner{
+		BaseScanner: base,
+		repo:        repo,
+		commits:     commits,
+		repoName:    getRepoName(base.opts),
+	}
+}
+
+// Scan kicks off a CommitsScanner Scan
+func (css *CommitsScanner) Scan() (report.Report, error) {
+	var scannerReport report.Report
+	for _, c := range css.commits {
+		c, err := obtainCommit(css.repo, c)
+		if err != nil {
+			return scannerReport, nil
+		}
+		cs := NewCommitScanner(css.BaseScanner, css.repo, c)
+		commitReport, err := cs.Scan()
+		if err != nil {
+			return scannerReport, err
+		}
+		scannerReport.Leaks = append(scannerReport.Leaks, commitReport.Leaks...)
+		scannerReport.Commits++
+	}
+	return scannerReport, nil
+}

+ 60 - 0
scan/filesatcommit.go

@@ -0,0 +1,60 @@
+package scan
+
+import (
+	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing/object"
+	"github.com/zricethezav/gitleaks/v7/report"
+)
+
+// FilesAtCommitScanner is a files at commit scanner. This differs from CommitScanner
+// as CommitScanner generates patches that are scanned. FilesAtCommitScanner instead looks at
+// files available at a commit's worktree and scans the entire content of said files.
+// Apologies for the awful struct name...
+type FilesAtCommitScanner struct {
+	BaseScanner
+
+	repo     *git.Repository
+	commit   *object.Commit
+	repoName string
+}
+
+// NewFilesAtCommitScanner creates and returns a files at commit scanner
+func NewFilesAtCommitScanner(base BaseScanner, repo *git.Repository, commit *object.Commit) *FilesAtCommitScanner {
+	fs := &FilesAtCommitScanner{
+		BaseScanner: base,
+		repo:        repo,
+		commit:      commit,
+		repoName:    getRepoName(base.opts),
+	}
+	fs.scannerType = typeFilesAtCommitScanner
+	return fs
+}
+
+// Scan kicks off a FilesAtCommitScanner Scan
+func (fs *FilesAtCommitScanner) Scan() (report.Report, error) {
+	var scannerReport report.Report
+	fIter, err := fs.commit.Files()
+	if err != nil {
+		return scannerReport, err
+	}
+
+	err = fIter.ForEach(func(f *object.File) error {
+		bin, err := f.IsBinary()
+		if bin {
+			return nil
+		} else if err != nil {
+			return err
+		}
+
+		content, err := f.Contents()
+		if err != nil {
+			return err
+		}
+
+		scannerReport.Leaks = append(scannerReport.Leaks, checkRules(fs.BaseScanner, fs.commit, fs.repoName, f.Name, content)...)
+		return nil
+	})
+
+	scannerReport.Commits = 1
+	return scannerReport, err
+}

+ 103 - 0
scan/nogit.go

@@ -0,0 +1,103 @@
+package scan
+
+import (
+	"bufio"
+	"context"
+	"os"
+	"path/filepath"
+	"sync"
+
+	"github.com/zricethezav/gitleaks/v7/report"
+
+	"golang.org/x/sync/errgroup"
+)
+
+// NoGitScanner is a scanner that absolutely despises git
+type NoGitScanner struct {
+	BaseScanner
+	leakChan chan report.Leak
+	leakWG   *sync.WaitGroup
+	leaks    []report.Leak
+}
+
+// NewNoGitScanner creates and returns a nogit scanner. This is used for scanning files and directories
+func NewNoGitScanner(base BaseScanner) *NoGitScanner {
+	ngs := &NoGitScanner{
+		BaseScanner: base,
+		leakChan:    make(chan report.Leak),
+		leakWG:      &sync.WaitGroup{},
+	}
+
+	go ngs.receiveLeaks()
+
+	ngs.scannerType = typeNoGitScanner
+
+	return ngs
+}
+
+// Scan kicks off a NoGitScanner Scan
+func (ngs *NoGitScanner) Scan() (report.Report, error) {
+	var scannerReport report.Report
+
+	g, _ := errgroup.WithContext(context.Background())
+	paths := make(chan string)
+	semaphore := make(chan bool, howManyThreads(ngs.opts.Threads))
+	wg := sync.WaitGroup{}
+
+	g.Go(func() error {
+		defer close(paths)
+		return filepath.Walk(ngs.opts.Path,
+			func(path string, fInfo os.FileInfo, err error) error {
+				if err != nil {
+					return err
+				}
+				if fInfo.Mode().IsRegular() {
+					paths <- path
+				}
+				return nil
+			})
+	})
+
+	for path := range paths {
+		p := path
+		wg.Add(1)
+		semaphore <- true
+		g.Go(func() error {
+			defer func() {
+				<-semaphore
+				wg.Done()
+			}()
+			f, err := os.Open(p)
+			if err != nil {
+				return err
+			}
+			scanner := bufio.NewScanner(f)
+			line := 0
+			for scanner.Scan() {
+				line++
+				leaks := checkRules(ngs.BaseScanner, emptyCommit(), "", f.Name(), scanner.Text())
+				for _, leak := range leaks {
+					leak.LineNumber = line
+					if ngs.opts.Verbose {
+						logLeak(leak, ngs.opts.Redact)
+					}
+					ngs.leakWG.Add(1)
+					ngs.leakChan <- leak
+				}
+			}
+			return f.Close()
+		})
+	}
+	wg.Wait()
+	ngs.leakWG.Wait()
+	scannerReport.Leaks = ngs.leaks
+
+	return scannerReport, nil
+}
+
+func (ngs *NoGitScanner) receiveLeaks() {
+	for leak := range ngs.leakChan {
+		ngs.leaks = append(ngs.leaks, leak)
+		ngs.leakWG.Done()
+	}
+}

+ 69 - 0
scan/parent.go

@@ -0,0 +1,69 @@
+package scan
+
+import (
+	"io/ioutil"
+	"path/filepath"
+
+	"github.com/zricethezav/gitleaks/v7/report"
+
+	"github.com/go-git/go-git/v5"
+	log "github.com/sirupsen/logrus"
+)
+
+// ParentScanner is a parent directory scanner
+type ParentScanner struct {
+	BaseScanner
+}
+
+// NewParentScanner creates and returns a directory scanner
+func NewParentScanner(base BaseScanner) *ParentScanner {
+	ds := &ParentScanner{
+		BaseScanner: base,
+	}
+	ds.scannerType = typeDirScanner
+	return ds
+}
+
+// Scan kicks off a ParentScanner scan. This uses the directory from --path to discovery repos
+func (ds *ParentScanner) Scan() (report.Report, error) {
+	var scannerReport report.Report
+	log.Debugf("scanning repos in %s\n", ds.opts.Path)
+
+	files, err := ioutil.ReadDir(ds.opts.Path)
+	if err != nil {
+		return scannerReport, err
+	}
+	for _, f := range files {
+		if !f.IsDir() {
+			continue
+		}
+
+		repo, err := git.PlainOpen(filepath.Join(ds.opts.Path, f.Name()))
+		if err != nil {
+			if err.Error() == "repository does not exist" {
+				log.Debugf("%s is not a git repository", f.Name())
+				continue
+			}
+			return scannerReport, err
+		}
+		skip := false
+		for _, allowListedRepo := range ds.cfg.Allowlist.Repos {
+			if regexMatched(f.Name(), allowListedRepo) {
+				skip = true
+			}
+		}
+		if skip {
+			continue
+		}
+
+		rs := NewRepoScanner(ds.BaseScanner, repo)
+		rs.repoName = f.Name()
+		repoReport, err := rs.Scan()
+		if err != nil {
+			return scannerReport, err
+		}
+		scannerReport.Leaks = append(scannerReport.Leaks, repoReport.Leaks...)
+		scannerReport.Commits += repoReport.Commits
+	}
+	return scannerReport, nil
+}

+ 120 - 261
scan/repo.go

@@ -1,304 +1,163 @@
 package scan
 
 import (
-	"context"
-	"crypto/md5"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"path"
-	"path/filepath"
-	"runtime"
-	"time"
+	"sync"
 
-	"github.com/zricethezav/gitleaks/v6/config"
-	"github.com/zricethezav/gitleaks/v6/manager"
+	"github.com/zricethezav/gitleaks/v7/report"
 
-	"github.com/BurntSushi/toml"
-	"github.com/go-git/go-billy/v5"
 	"github.com/go-git/go-git/v5"
-	"github.com/go-git/go-git/v5/plumbing"
-	"github.com/go-git/go-git/v5/storage/memory"
+	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
+	"github.com/go-git/go-git/v5/plumbing/object"
+	"github.com/go-git/go-git/v5/plumbing/storer"
 	log "github.com/sirupsen/logrus"
 )
 
-// Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
-// Commits are inspected from the *git.Repository object. If a Commit is found then we send it
-// via the manager LeakChan where the manager receives and keeps track of all leaks.
-type Repo struct {
-	*git.Repository
+// RepoScanner is a repo scanner
+type RepoScanner struct {
+	BaseScanner
+	repo     *git.Repository
+	repoName string
 
-	// config is used when the --repo-config option is set.
-	// This allows users to load up configs specific to their repos.
-	// Imagine the scenario where you are doing an scan of a large organization
-	// and you want certain repos to look for specific rules. If those specific repos
-	// have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
-	// for those repo scans.
-	config config.Config
-
-	// ctx is used to signal timeouts to running goroutines
-	ctx    context.Context
-	cancel context.CancelFunc
-
-	Name    string
-	Manager *manager.Manager
+	leakChan  chan report.Leak
+	leakWG    *sync.WaitGroup
+	leakCache map[string]bool
+	leaks     []report.Leak
 }
 
-// NewRepo initializes and returns a Repo struct.
-func NewRepo(m *manager.Manager) *Repo {
-	return &Repo{
-		Manager: m,
-		config:  m.Config,
-		ctx:     context.Background(),
+// NewRepoScanner returns a new repo scanner (go figure). This function also
+// sets up the leak listener for multi-threaded awesomeness.
+func NewRepoScanner(base BaseScanner, repo *git.Repository) *RepoScanner {
+	rs := &RepoScanner{
+		BaseScanner: base,
+		repo:        repo,
+		leakChan:    make(chan report.Leak),
+		leakWG:      &sync.WaitGroup{},
+		leakCache:   make(map[string]bool),
+		repoName:    getRepoName(base.opts),
 	}
-}
 
-// Run accepts a manager and begins an scan based on the options/configs set in the manager.
-func Run(m *manager.Manager) error {
-	if m.Opts.OwnerPath != "" {
-		files, err := ioutil.ReadDir(m.Opts.OwnerPath)
-		if err != nil {
-			return err
-		}
-		for _, f := range files {
-			if !f.IsDir() {
-				continue
-			}
-			m.Opts.RepoPath = fmt.Sprintf("%s/%s", m.Opts.OwnerPath, f.Name())
-			if err := runHelper(NewRepo(m)); err != nil {
-				log.Warnf("%s is not a git repo, skipping", f.Name())
-			}
-		}
-		return nil
-	}
+	rs.scannerType = typeRepoScanner
 
-	return runHelper(NewRepo(m))
+	go rs.receiveLeaks()
+
+	return rs
 }
 
-func runHelper(r *Repo) error {
-	// Ignore allowlisted repos
-	for _, allowListedRepo := range r.Manager.Config.Allowlist.Repos {
-		if RegexMatched(r.Manager.Opts.RepoPath, allowListedRepo) {
-			return nil
+// Scan kicks of a repo scan
+func (rs *RepoScanner) Scan() (report.Report, error) {
+	var scannerReport report.Report
+	logOpts, err := logOptions(rs.repo, rs.opts)
+	if err != nil {
+		return scannerReport, err
+	}
+	cIter, err := rs.repo.Log(logOpts)
+	if err != nil {
+		return scannerReport, err
+	}
+	semaphore := make(chan bool, howManyThreads(rs.opts.Threads))
+	wg := sync.WaitGroup{}
+
+	err = cIter.ForEach(func(c *object.Commit) error {
+		if c == nil || depthReached(scannerReport.Commits, rs.opts) {
+			return storer.ErrStop
 		}
-		if RegexMatched(r.Manager.Opts.Repo, allowListedRepo) {
+
+		// Check if Commit is allowlisted
+		if isCommitAllowListed(c.Hash.String(), rs.cfg.Allowlist.Commits) {
 			return nil
 		}
-	}
-	if r.Manager.Opts.OpenLocal() {
-		r.Name = path.Base(r.Manager.Opts.RepoPath)
-		if err := r.Open(); err != nil {
-			return err
-		}
 
-		// Check if we are checking uncommitted files. This is the default behavior
-		// for a "$ gitleaks" command with no options set
-		if r.Manager.Opts.CheckUncommitted() {
-			if err := r.scanUncommitted(); err != nil {
+		// Check if at root
+		if len(c.ParentHashes) == 0 {
+			scannerReport.Commits++
+			facScanner := NewFilesAtCommitScanner(rs.BaseScanner, rs.repo, c)
+			facScanner.repoName = rs.repoName
+			facReport, err := facScanner.Scan()
+			if err != nil {
 				return err
 			}
+			scannerReport.Leaks = append(scannerReport.Leaks, facReport.Leaks...)
 			return nil
 		}
-	} else {
-		if err := r.Clone(nil); err != nil {
+
+		// inspect first parent only as all other parents will be eventually reached
+		// (they exist as the tip of other branches, etc)
+		// See https://github.com/zricethezav/gitleaks/issues/413 for details
+		parent, err := c.Parent(0)
+		if err != nil {
 			return err
 		}
-	}
-	return r.Scan()
-}
 
-// Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
-// is determined by the clone options set in Manager.metadata.cloneOptions
-func (repo *Repo) Clone(cloneOption *git.CloneOptions) error {
-	var (
-		repository *git.Repository
-		err        error
-	)
-	if cloneOption == nil {
-		cloneOption = repo.Manager.CloneOptions
-	}
+		defer func() {
+			if err := recover(); err != nil {
+				// sometimes the Patch generation will fail due to a known bug in
+				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
+				// Once a fix has been merged I will remove this recover.
+				return
+			}
+		}()
 
-	log.Infof("cloning... %s", cloneOption.URL)
-	start := time.Now()
+		if parent == nil {
+			// shouldn't reach this point but just in case
+			return nil
+		}
 
-	if repo.Manager.CloneDir != "" {
-		clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
-		repository, err = git.PlainClone(clonePath, false, cloneOption)
-	} else {
-		repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
-	}
-	if err != nil {
-		return err
-	}
-	repo.Name = filepath.Base(repo.Manager.Opts.Repo)
-	repo.Repository = repository
-	repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
+		// start := time.Now()
+		patch, err := parent.Patch(c)
+		if err != nil {
+			log.Errorf("could not generate Patch")
+		}
 
-	return nil
-}
+		scannerReport.Commits++
+		wg.Add(1)
+		semaphore <- true
+		go func(c *object.Commit, patch *object.Patch) {
+			defer func() {
+				<-semaphore
+				wg.Done()
+			}()
 
-// howManyThreads will return a number 1-GOMAXPROCS which is the number
-// of goroutines that will spawn during gitleaks execution
-func howManyThreads(threads int) int {
-	maxThreads := runtime.GOMAXPROCS(0)
-	if threads == 0 {
-		return 1
-	} else if threads > maxThreads {
-		log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
-		return maxThreads
-	}
-	return threads
-}
+			// patchContent is used for searching for leak line number
+			patchContent := patch.String()
 
-// getLogOptions determines what log options are used when iterating through commits.
-// It is similar to `git log {branch}`. Default behavior is to log ALL branches so
-// gitleaks gets the full git history.
-func getLogOptions(repo *Repo) (*git.LogOptions, error) {
-	var logOpts git.LogOptions
-	const dateformat string = "2006-01-02"
-	const timeformat string = "2006-01-02T15:04:05-0700"
-	if repo.Manager.Opts.CommitFrom != "" {
-		logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
-	}
-	if repo.Manager.Opts.CommitSince != "" {
-		if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitSince); err == nil {
-			logOpts.Since = &t
-		} else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitSince); err == nil {
-			logOpts.Since = &t
-		} else {
-			return nil, err
-		}
-	}
-	if repo.Manager.Opts.CommitUntil != "" {
-		if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitUntil); err == nil {
-			logOpts.Until = &t
-		} else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitUntil); err == nil {
-			logOpts.Until = &t
-		} else {
-			return nil, err
-		}
-	}
-	if repo.Manager.Opts.Branch != "" {
-		refs, err := repo.Storer.IterReferences()
-		if err != nil {
-			return nil, err
-		}
-		err = refs.ForEach(func(ref *plumbing.Reference) error {
-			if ref.Name().IsTag() {
-				return nil
-			}
-			// check heads first
-			if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
-				logOpts = git.LogOptions{
-					From: ref.Hash(),
+			for _, f := range patch.FilePatches() {
+				if f.IsBinary() {
+					continue
 				}
-				return nil
-			} else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
-				logOpts = git.LogOptions{
-					From: ref.Hash(),
+
+				for _, chunk := range f.Chunks() {
+					if chunk.Type() == fdiff.Add {
+						_, to := f.Files()
+						lineLookup := make(map[string]bool)
+						for _, leak := range checkRules(rs.BaseScanner, c, rs.repoName, to.Path(), chunk.Content()) {
+							leak.LineNumber = extractLine(patchContent, leak, lineLookup)
+							leak.LeakURL = leakURL(leak)
+							if rs.opts.Verbose {
+								logLeak(leak, rs.opts.Redact)
+							}
+							rs.leakWG.Add(1)
+							rs.leakChan <- leak
+						}
+					}
 				}
-				return nil
 			}
-			return nil
-		})
-		if err != nil {
-			return nil, err
-		}
-
-		if logOpts.From.IsZero() {
-			return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
-		}
-		return &logOpts, nil
-	}
-	if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
-		return &logOpts, nil
-	}
-	return &git.LogOptions{All: true}, nil
-}
-
-// howLong accepts a time.Time object which is subtracted from time.Now() and
-// converted to nanoseconds which is returned
-func howLong(t time.Time) int64 {
-	return time.Now().Sub(t).Nanoseconds()
-}
+		}(c, patch)
 
-// Open opens a local repo either from repo-path or $PWD
-func (repo *Repo) Open() error {
-	if repo.Manager.Opts.RepoPath != "" {
-		// open git repo from repo path
-		repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
-		if err != nil {
-			return err
-		}
-		repo.Repository = repository
-	} else {
-		// open git repo from PWD
-		dir, err := os.Getwd()
-		if err != nil {
-			return err
-		}
-		repository, err := git.PlainOpen(dir)
-		if err != nil {
-			return err
+		if c.Hash.String() == rs.opts.CommitTo {
+			return storer.ErrStop
 		}
-		repo.Repository = repository
-		repo.Name = path.Base(dir)
-	}
-	return nil
-}
-
-func (repo *Repo) loadRepoConfig() (config.Config, error) {
-	wt, err := repo.Repository.Worktree()
-	if err != nil {
-		return config.Config{}, err
-	}
-	var f billy.File
-	f, _ = wt.Filesystem.Open(".gitleaks.toml")
-	if f == nil {
-		f, err = wt.Filesystem.Open("gitleaks.toml")
-		if err != nil {
-			return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
-		}
-	}
-	defer f.Close()
-	var tomlLoader config.TomlLoader
-	_, err = toml.DecodeReader(f, &tomlLoader)
-	if err != nil {
-		return config.Config{}, err
-	}
-
-	return tomlLoader.Parse()
-}
+		return nil
+	})
 
-// timeoutReached returns true if the timeout deadline has been met. This function should be used
-// at the top of loops and before potentially long running goroutines (like checking inefficient regexes)
-func (repo *Repo) timeoutReached() bool {
-	if repo.ctx.Err() == context.DeadlineExceeded {
-		return true
-	}
-	return false
+	wg.Wait()
+	rs.leakWG.Wait()
+	scannerReport.Leaks = append(scannerReport.Leaks, rs.leaks...)
+	return scannerReport, nil
 }
 
-// setupTimeout parses the --timeout option and assigns a context with timeout to the manager
-// which will exit early if the timeout has been met.
-func (repo *Repo) setupTimeout() error {
-	if repo.Manager.Opts.Timeout == "" {
-		return nil
-	}
-	timeout, err := time.ParseDuration(repo.Manager.Opts.Timeout)
-	if err != nil {
-		return err
+func (rs *RepoScanner) receiveLeaks() {
+	for leak := range rs.leakChan {
+		rs.leaks = append(rs.leaks, leak)
+		rs.leakWG.Done()
 	}
-
-	repo.ctx, repo.cancel = context.WithTimeout(context.Background(), timeout)
-
-	go func() {
-		select {
-		case <-repo.ctx.Done():
-			if repo.timeoutReached() {
-				log.Warnf("Timeout deadline (%s) exceeded for %s", timeout.String(), repo.Name)
-			}
-		}
-	}()
-	return nil
 }

+ 0 - 400
scan/rule.go

@@ -1,400 +0,0 @@
-package scan
-
-import (
-	"bufio"
-	"fmt"
-	"io"
-	"math"
-	"path/filepath"
-	"regexp"
-	"strconv"
-	"strings"
-	"time"
-
-	"github.com/zricethezav/gitleaks/v6/config"
-	"github.com/zricethezav/gitleaks/v6/manager"
-
-	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
-	"github.com/go-git/go-git/v5/plumbing/object"
-	log "github.com/sirupsen/logrus"
-)
-
-const (
-	diffAddPrefix           = "+"
-	diffAddFilePrefix       = "+++ b"
-	diffAddFilePrefixSlash  = "+++ b/"
-	diffLineSignature       = " @@"
-	diffLineSignaturePrefix = "@@ "
-	defaultLineNumber       = -1
-)
-
-// CheckRules accepts bundle and checks each rule defined in the config against the bundle's content.
-func (repo *Repo) CheckRules(bundle *Bundle) {
-	filename := filepath.Base(bundle.FilePath)
-	path := filepath.Dir(bundle.FilePath)
-
-	bundle.lineLookup = make(map[string]bool)
-
-	// We want to check if there is a allowlist for this file
-	if len(repo.config.Allowlist.Files) != 0 {
-		for _, reFileName := range repo.config.Allowlist.Files {
-			if RegexMatched(filename, reFileName) {
-				log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
-				return
-			}
-		}
-	}
-
-	// We want to check if there is a allowlist for this path
-	if len(repo.config.Allowlist.Paths) != 0 {
-		for _, reFilePath := range repo.config.Allowlist.Paths {
-			if RegexMatched(path, reFilePath) {
-				log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
-				return
-			}
-		}
-	}
-
-	for _, rule := range repo.config.Rules {
-		start := time.Now()
-
-		// For each rule we want to check filename allowlists
-		if isAllowListed(filename, rule.AllowList.Files) || isAllowListed(path, rule.AllowList.Paths) {
-			continue
-		}
-
-		// If it has fileNameRegex and it doesnt match we continue to next rule
-		if ruleContainFileRegex(rule) && !RegexMatched(filename, rule.File) {
-			continue
-		}
-
-		// If it has filePathRegex and it doesnt match we continue to next rule
-		if ruleContainPathRegex(rule) && !RegexMatched(path, rule.Path) {
-			continue
-		}
-
-		// If it doesnt contain a Content regex then it is a filename regex match
-		if !ruleContainRegex(rule) {
-			repo.Manager.SendLeaks(manager.Leak{
-				LineNumber: defaultLineNumber,
-				Line:       "N/A",
-				Offender:   "Filename/path offender: " + filename,
-				Commit:     bundle.Commit.Hash.String(),
-				Repo:       repo.Name,
-				Message:    bundle.Commit.Message,
-				Rule:       rule.Description,
-				Author:     bundle.Commit.Author.Name,
-				Email:      bundle.Commit.Author.Email,
-				Date:       bundle.Commit.Author.When,
-				Tags:       strings.Join(rule.Tags, ", "),
-				File:       filename,
-				Operation:  diffOpToString(bundle.Operation),
-			})
-		} else {
-			//otherwise we check if it matches Content regex
-			locs := rule.Regex.FindAllIndex([]byte(bundle.Content), -1)
-			if len(locs) != 0 {
-				for _, loc := range locs {
-					start := loc[0]
-					end := loc[1]
-					for start != 0 && bundle.Content[start] != '\n' {
-						start--
-					}
-
-					if bundle.Content[start] == '\n' {
-						start++
-					}
-
-					for end < len(bundle.Content)-1 && bundle.Content[end] != '\n' {
-						end++
-					}
-
-					line := bundle.Content[start:end]
-					offender := bundle.Content[loc[0]:loc[1]]
-					groups := rule.Regex.FindStringSubmatch(offender)
-
-					if isAllowListed(line, append(rule.AllowList.Regexes, repo.config.Allowlist.Regexes...)) {
-						continue
-					}
-
-					if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
-						continue
-					}
-
-					// 0 is a match for the full regex pattern
-					if 0 < rule.ReportGroup && rule.ReportGroup < len(groups) {
-						offender = groups[rule.ReportGroup]
-					}
-
-					leak := manager.Leak{
-						LineNumber: defaultLineNumber,
-						Line:       line,
-						Offender:   offender,
-						Commit:     bundle.Commit.Hash.String(),
-						Repo:       repo.Name,
-						Message:    bundle.Commit.Message,
-						Rule:       rule.Description,
-						Author:     bundle.Commit.Author.Name,
-						Email:      bundle.Commit.Author.Email,
-						Date:       bundle.Commit.Author.When,
-						Tags:       strings.Join(rule.Tags, ", "),
-						File:       bundle.FilePath,
-						Operation:  diffOpToString(bundle.Operation),
-					}
-
-					// only search for line numbers on non-deletions
-					if bundle.Operation != fdiff.Delete {
-						extractAndInjectLineNumber(&leak, bundle, repo)
-					}
-
-					repo.Manager.SendLeaks(leak)
-				}
-			}
-		}
-
-		repo.Manager.RecordTime(manager.RegexTime{
-			Time:  howLong(start),
-			Regex: rule.Regex.String(),
-		})
-	}
-}
-
-// RegexMatched matched an interface to a regular expression. The interface f can
-// be a string type or go-git *object.File type.
-func RegexMatched(f interface{}, re *regexp.Regexp) bool {
-	if re == nil {
-		return false
-	}
-	switch f.(type) {
-	case nil:
-		return false
-	case string:
-		if re.FindString(f.(string)) != "" {
-			return true
-		}
-		return false
-	case *object.File:
-		if re.FindString(f.(*object.File).Name) != "" {
-			return true
-		}
-		return false
-	}
-	return false
-}
-
-// diffOpToString converts a fdiff.Operation to a string
-func diffOpToString(operation fdiff.Operation) string {
-	switch operation {
-	case fdiff.Add:
-		return "addition"
-	case fdiff.Equal:
-		return "equal"
-	default:
-		return "deletion"
-	}
-}
-
-// extractAndInjectLine accepts a leak, bundle, and repo which it uses to do a reverse search in order to extract
-// the line number of a historic or present leak. The function is only called when the git operation is an addition
-// or none, it does not get called when the git operation is deletion.
-func extractAndInjectLineNumber(leak *manager.Leak, bundle *Bundle, repo *Repo) {
-	var err error
-
-	switch bundle.scanType {
-	case patchScan:
-		if bundle.Patch == "" {
-			return
-		}
-
-		// This is needed as some patches generate strings that are larger than
-		// scanners max size (MaxScanTokenSize = 64 * 1024)
-		// https://github.com/zricethezav/gitleaks/issues/413
-		buf := make([]byte, len(bundle.Patch))
-		scanner := bufio.NewScanner(strings.NewReader(bundle.Patch))
-		scanner.Buffer(buf, len(bundle.Patch))
-		scanner.Split(bufio.ScanLines)
-
-		currFile := ""
-		currLine := 0
-		currStartDiffLine := 0
-
-		for scanner.Scan() {
-			txt := scanner.Text()
-			if strings.HasPrefix(txt, diffAddFilePrefix) {
-				currStartDiffLine = 1
-				currLine = 0
-				currFile = strings.Split(txt, diffAddFilePrefixSlash)[1]
-
-				// next line contains diff line information so lets scan it here
-				scanner.Scan()
-
-				txt := scanner.Text()
-				i := strings.Index(txt, diffAddPrefix)
-				pairs := strings.Split(strings.Split(txt[i+1:], diffLineSignature)[0], ",")
-				currStartDiffLine, err = strconv.Atoi(pairs[0])
-				if err != nil {
-					log.Debug(err)
-					return
-				}
-				continue
-			} else if strings.HasPrefix(txt, diffAddPrefix) && strings.Contains(txt, leak.Line) && leak.File == currFile {
-				potentialLine := currLine + currStartDiffLine
-				if _, ok := bundle.lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, potentialLine, currFile)]; !ok {
-					bundle.lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, potentialLine, currFile)] = true
-					leak.LineNumber = potentialLine
-					return
-				}
-			} else if strings.HasPrefix(txt, diffLineSignaturePrefix) && currStartDiffLine != 0 {
-				// This logic is used for when there are multiple leaks of the same offender within the same patch
-				i := strings.Index(txt, diffAddPrefix)
-				pairs := strings.Split(strings.Split(txt[i+1:], diffLineSignature)[0], ",")
-				currStartDiffLine, err = strconv.Atoi(pairs[0])
-				if err != nil {
-					log.Debug(err)
-					return
-				}
-				currLine = 0
-				if !strings.HasSuffix(txt, diffLineSignature) {
-					currLine = -1
-				}
-			}
-			currLine++
-		}
-	case commitScan:
-		if bundle.Commit == nil {
-			return
-		}
-		f, err := bundle.Commit.File(bundle.FilePath)
-		if err != nil {
-			log.Error(err)
-			return
-		}
-		r, err := f.Reader()
-		if err != nil {
-			log.Error(err)
-			return
-		}
-		leak.LineNumber = extractLineHelper(r, bundle, leak)
-	case uncommittedScan:
-		wt, err := repo.Worktree()
-		if err != nil {
-			log.Error(err)
-			return
-		}
-		f, err := wt.Filesystem.Open(leak.File)
-		if err != nil {
-			log.Error(err)
-			return
-		}
-		leak.LineNumber = extractLineHelper(f, bundle, leak)
-	}
-}
-
-// extractLineHelper consolidates code for checking the leak line against the contents of a reader to find the
-// line number of the leak.
-func extractLineHelper(r io.Reader, bundle *Bundle, leak *manager.Leak) int {
-	scanner := bufio.NewScanner(r)
-	lineNumber := 1
-	for scanner.Scan() {
-		if leak.Line == scanner.Text() {
-			if _, ok := bundle.lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, bundle.FilePath)]; !ok {
-				bundle.lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, bundle.FilePath)] = true
-				return lineNumber
-			}
-		}
-		lineNumber++
-	}
-	return -1
-}
-
-// trippedEntropy checks if a given capture group or offender falls in between entropy ranges
-// supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
-func trippedEntropy(groups []string, rule config.Rule) bool {
-	for _, e := range rule.Entropies {
-		if len(groups) > e.Group {
-			entropy := shannonEntropy(groups[e.Group])
-			if entropy >= e.Min && entropy <= e.Max {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-// shannonEntropy calculates the entropy of data using the formula defined here:
-// https://en.wiktionary.org/wiki/Shannon_entropy
-// Another way to think about what this is doing is calculating the number of bits
-// needed to on average encode the data. So, the higher the entropy, the more random the data, the
-// more bits needed to encode that data.
-func shannonEntropy(data string) (entropy float64) {
-	if data == "" {
-		return 0
-	}
-
-	charCounts := make(map[rune]int)
-	for _, char := range data {
-		charCounts[char]++
-	}
-
-	invLength := 1.0 / float64(len(data))
-	for _, count := range charCounts {
-		freq := float64(count) * invLength
-		entropy -= freq * math.Log2(freq)
-	}
-
-	return entropy
-}
-
-// Checks if the given rule has a regex
-func ruleContainRegex(rule config.Rule) bool {
-	if rule.Regex == nil {
-		return false
-	}
-	if rule.Regex.String() == "" {
-		return false
-	}
-	return true
-}
-
-// Checks if the given rule has a file name regex
-func ruleContainFileRegex(rule config.Rule) bool {
-	if rule.File == nil {
-		return false
-	}
-	if rule.File.String() == "" {
-		return false
-	}
-	return true
-}
-
-// Checks if the given rule has a file path regex
-func ruleContainPathRegex(rule config.Rule) bool {
-	if rule.Path == nil {
-		return false
-	}
-	if rule.Path.String() == "" {
-		return false
-	}
-	return true
-}
-
-func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
-	for _, hash := range allowlistedCommits {
-		if commitHash == hash {
-			return true
-		}
-	}
-	return false
-}
-
-func isAllowListed(target string, allowList []*regexp.Regexp) bool {
-	if len(allowList) != 0 {
-		for _, re := range allowList {
-			if re.FindString(target) != "" {
-				return true
-			}
-		}
-	}
-	return false
-
-}

+ 114 - 439
scan/scan.go

@@ -1,493 +1,168 @@
 package scan
 
 import (
-	"bufio"
-	"bytes"
-	"fmt"
-	"io"
 	"os"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/zricethezav/gitleaks/v6/manager"
+	"path/filepath"
 
 	"github.com/go-git/go-git/v5"
-	"github.com/go-git/go-git/v5/plumbing"
-	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
-	"github.com/go-git/go-git/v5/plumbing/object"
-	"github.com/go-git/go-git/v5/plumbing/storer"
-	"github.com/sergi/go-diff/diffmatchpatch"
-	log "github.com/sirupsen/logrus"
+
+	"github.com/zricethezav/gitleaks/v7/config"
+	"github.com/zricethezav/gitleaks/v7/options"
+	"github.com/zricethezav/gitleaks/v7/report"
 )
 
-// Bundle contains various git information for scans.
-type Bundle struct {
-	Commit    *object.Commit
-	Patch     string
-	Content   string
-	FilePath  string
-	Operation fdiff.Operation
+// Scanner abstracts unique scanner internals while exposing the Scan function which
+// returns a report.
+type Scanner interface {
+	Scan() (report.Report, error)
+}
 
-	reader     io.Reader
-	lineLookup map[string]bool
-	scanType   int
+// BaseScanner is a container for common data each scanner needs.
+type BaseScanner struct {
+	opts        options.Options
+	cfg         config.Config
+	stopChan    chan os.Signal
+	scannerType ScannerType
 }
 
-// commitScanner is a function signature for scanning commits. There is some
-// redundant work needed by scanning all files at a commit (--files-at-commit=) and scanning
-// the patches generated by a commit (--commit=). The function scanCommit wraps that redundant work
-// and accepts a commitScanner for the different logic needed between the two cases described above.
-type commitScanner func(c *object.Commit, repo *Repo) error
+// ScannerType is the scanner type which is determined based on program arguments
+type ScannerType int
 
 const (
-	// We need to differentiate between scans as the logic for line searching is different between
-	// scanning patches, commits, and uncommitted files.
-	patchScan int = iota + 1
-	uncommittedScan
-	commitScan
+	typeRepoScanner ScannerType = iota + 1
+	typeDirScanner
+	typeCommitScanner
+	typeCommitsScanner
+	typeUnstagedScanner
+	typeFilesAtCommitScanner
+	typeNoGitScanner
+	typeEmpty
 )
 
-// Scan is responsible for scanning the entire history (default behavior) of a
-// git repo. Options that can change the behavior of this function include: --Commit, --depth, --branch.
-// See options/options.go for an explanation on these options.
-func (repo *Repo) Scan() error {
-	if err := repo.setupTimeout(); err != nil {
-		return err
-	}
-	if repo.cancel != nil {
-		defer repo.cancel()
-	}
-
-	if repo.Repository == nil {
-		return fmt.Errorf("%s repo is empty", repo.Name)
-	}
-
-	// load up alternative config if possible, if not use manager's config
-	if repo.Manager.Opts.RepoConfig {
-		cfg, err := repo.loadRepoConfig()
-		if err != nil {
-			return err
+// NewScanner accepts options and a config which will be used to determine and create a
+// new scanner which is then returned.
+func NewScanner(opts options.Options, cfg config.Config) (Scanner, error) {
+	var (
+		repo *git.Repository
+		err  error
+	)
+	// TODO move this block to config parsing?
+	for _, allowListedRepo := range cfg.Allowlist.Repos {
+		if regexMatched(opts.Path, allowListedRepo) {
+			return nil, nil
 		}
-		repo.config = cfg
-	}
-
-	scanTimeStart := time.Now()
-
-	// See https://github.com/zricethezav/gitleaks/issues/326
-	// Scan commit patches, all files at a commit, or a range of commits
-	if repo.Manager.Opts.Commit != "" {
-		return scanCommit(repo.Manager.Opts.Commit, repo, scanCommitPatches)
-	} else if repo.Manager.Opts.FilesAtCommit != "" {
-		return scanCommit(repo.Manager.Opts.FilesAtCommit, repo, scanFilesAtCommit)
-	} else if repo.Manager.Opts.Commits != "" {
-		commits := strings.Split(repo.Manager.Opts.Commits, ",")
-		for _, c := range commits {
-			err := scanCommit(c, repo, scanCommitPatches)
-			if err != nil {
-				return err
-			}
+		if regexMatched(opts.RepoURL, allowListedRepo) {
+			return nil, nil
 		}
-		return nil
-	} else if repo.Manager.Opts.CommitsFile != "" {
-		file, err := os.Open(repo.Manager.Opts.CommitsFile)
-		if err != nil {
-			return err
-		}
-		defer file.Close()
-
-		scanner := bufio.NewScanner(file)
-		for scanner.Scan() {
-			err := scanCommit(scanner.Text(), repo, scanCommitPatches)
-			if err != nil {
-				return err
-			}
-		}
-		return nil
+	}
+	base := BaseScanner{
+		opts: opts,
+		cfg:  cfg,
 	}
 
-	logOpts, err := getLogOptions(repo)
+	// We want to return a dir scanner immediately since if the scan type is a directory scan
+	// we don't want to clone/open a repo until inside ParentScanner.Scan
+	st, err := scanType(opts)
 	if err != nil {
-		return err
+		return nil, err
 	}
-	cIter, err := repo.Log(logOpts)
-	if err != nil {
-		return err
+	if st == typeDirScanner {
+		return NewParentScanner(base), nil
 	}
 
-	cc := 0
-	semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
-	wg := sync.WaitGroup{}
-	err = cIter.ForEach(func(c *object.Commit) error {
-		if c == nil || repo.timeoutReached() || repo.depthReached(cc) {
-			return storer.ErrStop
-		}
-
-		// Check if Commit is allowlisted
-		if isCommitAllowListed(c.Hash.String(), repo.config.Allowlist.Commits) {
-			return nil
-		}
-
-		// Check if at root
-		if len(c.ParentHashes) == 0 {
-			cc++
-			err = scanFilesAtCommit(c, repo)
-			if err != nil {
-				return err
-			}
-			return nil
-		}
-
-		// increase Commit counter
-		cc++
-
-		// inspect first parent only as all other parents will be eventually reached
-		// (they exist as the tip of other branches, etc)
-		// See https://github.com/zricethezav/gitleaks/issues/413 for details
-		parent, err := c.Parent(0)
+	// Clone or open a repo if we need it
+	if needsRepo(st) {
+		repo, err = getRepo(base.opts)
 		if err != nil {
-			return err
-		}
-
-		defer func() {
-			if err := recover(); err != nil {
-				// sometimes the Patch generation will fail due to a known bug in
-				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
-				// Once a fix has been merged I will remove this recover.
-				return
-			}
-		}()
-		if repo.timeoutReached() {
-			return nil
-		}
-		if parent == nil {
-			// shouldn't reach this point but just in case
-			return nil
+			return nil, err
 		}
+	}
 
-		start := time.Now()
-		patch, err := parent.Patch(c)
+	// load up alternative config if possible, if not use manager's config
+	if opts.RepoConfigPath != "" {
+		base.cfg, err = config.LoadRepoConfig(repo, opts.RepoConfigPath)
 		if err != nil {
-			log.Errorf("could not generate Patch")
+			return nil, err
 		}
-		repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
-
-		wg.Add(1)
-		semaphore <- true
-		go func(c *object.Commit, patch *object.Patch) {
-			defer func() {
-				<-semaphore
-				wg.Done()
-			}()
-			scanPatch(patch, c, repo)
-		}(c, patch)
-
-		if c.Hash.String() == repo.Manager.Opts.CommitTo {
-			return storer.ErrStop
-		}
-		return nil
-	})
-
-	wg.Wait()
-	repo.Manager.RecordTime(manager.ScanTime(howLong(scanTimeStart)))
-	repo.Manager.IncrementCommits(cc)
-	return nil
-}
-
-// scanEmpty scans an empty repo without any commits. See https://github.com/zricethezav/gitleaks/issues/352
-func (repo *Repo) scanEmpty() error {
-	scanTimeStart := time.Now()
-	wt, err := repo.Worktree()
-	if err != nil {
-		return err
 	}
 
-	status, err := wt.Status()
-	if err != nil {
-		return err
-	}
-	for fn := range status {
-		workTreeBuf := bytes.NewBuffer(nil)
-		workTreeFile, err := wt.Filesystem.Open(fn)
+	switch st {
+	case typeCommitScanner:
+		c, err := obtainCommit(repo, opts.Commit)
 		if err != nil {
-			continue
+			return nil, err
 		}
-		if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
-			return err
-		}
-		repo.CheckRules(&Bundle{
-			Content:  workTreeBuf.String(),
-			FilePath: workTreeFile.Name(),
-			Commit:   emptyCommit(),
-			scanType: uncommittedScan,
-		})
-	}
-	repo.Manager.RecordTime(manager.ScanTime(howLong(scanTimeStart)))
-	return nil
-}
-
-// scanUncommitted will do a `git diff` and scan changed files that are being tracked. This is useful functionality
-// for a pre-Commit hook so you can make sure your code does not have any leaks before committing.
-func (repo *Repo) scanUncommitted() error {
-	// load up alternative config if possible, if not use manager's config
-	if repo.Manager.Opts.RepoConfig {
-		cfg, err := repo.loadRepoConfig()
+		return NewCommitScanner(base, repo, c), nil
+	case typeCommitsScanner:
+		commits, err := optsToCommits(opts)
 		if err != nil {
-			return err
+			return nil, err
 		}
-		repo.config = cfg
-	}
-
-	if err := repo.setupTimeout(); err != nil {
-		return err
-	}
-
-	r, err := repo.Head()
-	if err == plumbing.ErrReferenceNotFound {
-		// possibly an empty repo, or maybe its not, either way lets scan all the files in the directory
-		return repo.scanEmpty()
-	} else if err != nil {
-		return err
-	}
-
-	scanTimeStart := time.Now()
-
-	c, err := repo.CommitObject(r.Hash())
-	if err != nil {
-		return err
-	}
-	// Staged change so the Commit details do not yet exist. Insert empty defaults.
-	c.Hash = plumbing.Hash{}
-	c.Message = "***STAGED CHANGES***"
-	c.Author.Name = ""
-	c.Author.Email = ""
-	c.Author.When = time.Unix(0, 0).UTC()
-
-	prevTree, err := c.Tree()
-	if err != nil {
-		return err
-	}
-	wt, err := repo.Worktree()
-	if err != nil {
-		return err
+		return NewCommitsScanner(base, repo, commits), nil
+	case typeFilesAtCommitScanner:
+		c, err := obtainCommit(repo, opts.FilesAtCommit)
+		if err != nil {
+			return nil, err
+		}
+		return NewFilesAtCommitScanner(base, repo, c), nil
+	case typeUnstagedScanner:
+		return NewUnstagedScanner(base, repo), nil
+	case typeDirScanner:
+		return NewParentScanner(base), nil
+	case typeNoGitScanner:
+		return NewNoGitScanner(base), nil
+	default:
+		return NewRepoScanner(base, repo), nil
 	}
+}
 
-	status, err := wt.Status()
-	for fn, state := range status {
-		var (
-			prevFileContents string
-			currFileContents string
-			filename         string
-		)
-
-		if state.Staging != git.Untracked {
-			if state.Staging == git.Deleted {
-				// file in staging has been deleted, aka it is not on the filesystem
-				// so the contents of the file are ""
-				currFileContents = ""
-			} else {
-				workTreeBuf := bytes.NewBuffer(nil)
-				workTreeFile, err := wt.Filesystem.Open(fn)
-				if err != nil {
-					continue
-				}
-				if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
-					return err
-				}
-				currFileContents = workTreeBuf.String()
-				filename = workTreeFile.Name()
-			}
-
-			// get files at HEAD state
-			prevFile, err := prevTree.File(fn)
-			if err != nil {
-				prevFileContents = ""
-
-			} else {
-				prevFileContents, err = prevFile.Contents()
-				if err != nil {
-					return err
-				}
-				if filename == "" {
-					filename = prevFile.Name
-				}
-			}
-
-			dmp := diffmatchpatch.New()
-			diffs := dmp.DiffCleanupSemantic(dmp.DiffMain(prevFileContents, currFileContents, false))
-			var diffContents string
-			for _, d := range diffs {
-				if d.Type == diffmatchpatch.DiffInsert {
-					diffContents += fmt.Sprintf("%s\n", d.Text)
-				}
-			}
-			repo.CheckRules(&Bundle{
-				Content:  diffContents,
-				FilePath: filename,
-				Commit:   c,
-				scanType: uncommittedScan,
-			})
-		}
+func scanType(opts options.Options) (ScannerType, error) {
+	//if opts.OwnerPath != "" {
+	//	return typeDirScanner
+	//}
+	if opts.Commit != "" {
+		return typeCommitScanner, nil
 	}
-
-	if err != nil {
-		return err
+	if opts.Commits != "" || opts.CommitsFile != "" {
+		return typeCommitsScanner, nil
 	}
-	repo.Manager.RecordTime(manager.ScanTime(howLong(scanTimeStart)))
-	return nil
-}
-
-// scan accepts a Patch, Commit, and repo. If the patches contains files that are
-// binary, then gitleaks will skip scanning that file OR if a file is matched on
-// allowlisted files set in the configuration. If a global rule for files is defined and a filename
-// matches said global rule, then a leak is sent to the manager.
-// After that, file chunks are created which are then inspected by InspectString()
-func scanPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
-	bundle := Bundle{
-		Commit:   c,
-		Patch:    patch.String(),
-		scanType: patchScan,
+	if opts.FilesAtCommit != "" {
+		return typeFilesAtCommitScanner, nil
 	}
-	for _, f := range patch.FilePatches() {
-		if repo.timeoutReached() {
-			return
-		}
-		if f.IsBinary() {
-			continue
-		}
-		for _, chunk := range f.Chunks() {
-			if chunk.Type() == fdiff.Add || (repo.Manager.Opts.Deletion && chunk.Type() == fdiff.Delete) {
-				bundle.Content = chunk.Content()
-				bundle.Operation = chunk.Type()
-
-				// get filepath
-				from, to := f.Files()
-				if from != nil {
-					bundle.FilePath = from.Path()
-				} else if to != nil {
-					bundle.FilePath = to.Path()
-				} else {
-					bundle.FilePath = "???"
-				}
-				repo.CheckRules(&bundle)
-			}
+	if opts.Path != "" && !opts.NoGit {
+		if opts.CheckUncommitted() {
+			return typeUnstagedScanner, nil
 		}
-	}
-}
-
-// scanCommit accepts a Commit hash, repo, and commit scanning function. A new Commit
-// object will be created from the hash which will be passed into either scanCommitPatches
-// or scanFilesAtCommit depending on the options set.
-func scanCommit(commit string, repo *Repo, f commitScanner) error {
-	if commit == "latest" {
-		ref, err := repo.Repository.Head()
+		_, err := os.Stat(filepath.Join(opts.Path))
 		if err != nil {
-			return err
+			return typeEmpty, err
 		}
-		commit = ref.Hash().String()
-	}
-	repo.Manager.IncrementCommits(1)
-	h := plumbing.NewHash(commit)
-	c, err := repo.CommitObject(h)
-	if err != nil {
-		return err
-	}
-	return f(c, repo)
-}
-
-// scanCommitPatches accepts a Commit object and a repo. This function is only called when the --Commit=
-// option has been set. That option tells gitleaks to look only at a single Commit and check the contents
-// of said Commit. Similar to scan(), if the files contained in the Commit are a binaries or if they are
-// allowlisted then those files will be skipped.
-func scanCommitPatches(c *object.Commit, repo *Repo) error {
-	if len(c.ParentHashes) == 0 {
-		err := scanFilesAtCommit(c, repo)
-		if err != nil {
-			return err
+		// check if path/.git exists, if it does, this is a repo scan
+		// if not this is a multi-repo scan
+		_, err = os.Stat(filepath.Join(opts.Path, ".git"))
+		if os.IsNotExist(err) {
+			return typeDirScanner, nil
 		}
+		return typeRepoScanner, nil
 	}
-
-	return c.Parents().ForEach(func(parent *object.Commit) error {
-		defer func() {
-			if err := recover(); err != nil {
-				// sometimes the Patch generation will fail due to a known bug in
-				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
-				// Once a fix has been merged I will remove this recover.
-				return
-			}
-		}()
-		if repo.timeoutReached() {
-			return nil
-		}
-		if parent == nil {
-			return nil
-		}
-		start := time.Now()
-		patch, err := parent.Patch(c)
+	if opts.Path != "" && opts.NoGit {
+		_, err := os.Stat(filepath.Join(opts.Path))
 		if err != nil {
-			return fmt.Errorf("could not generate Patch")
+			return typeEmpty, err
 		}
-		repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
-
-		scanPatch(patch, c, repo)
-
-		return nil
-	})
-}
-
-// scanFilesAtCommit accepts a Commit object and a repo. This function is only called when the --files-at-Commit=
-// option has been set. That option tells gitleaks to look only at ALL the files at a Commit and check the contents
-// of said Commit. Similar to scan(), if the files contained in the Commit are a binaries or if they are
-// allowlisted then those files will be skipped.
-func scanFilesAtCommit(c *object.Commit, repo *Repo) error {
-	fIter, err := c.Files()
-	if err != nil {
-		return err
+		return typeNoGitScanner, nil
+	}
+	if opts.CheckUncommitted() {
+		return typeUnstagedScanner, nil
 	}
 
-	err = fIter.ForEach(func(f *object.File) error {
-		bin, err := f.IsBinary()
-		if bin || repo.timeoutReached() {
-			return nil
-		} else if err != nil {
-			return err
-		}
-
-		content, err := f.Contents()
-		if err != nil {
-			return err
-		}
-
-		repo.CheckRules(&Bundle{
-			Content:   content,
-			FilePath:  f.Name,
-			Commit:    c,
-			scanType:  commitScan,
-			Operation: fdiff.Add,
-		})
-		return nil
-	})
-	return err
+	// default to the most commonly used scanner, RepoScanner
+	return typeRepoScanner, nil
 }
 
-// depthReached checks if i meets the depth (--depth=) if set
-func (repo *Repo) depthReached(i int) bool {
-	if repo.Manager.Opts.Depth != 0 && repo.Manager.Opts.Depth == i {
-		log.Warnf("Exceeded depth limit (%d)", i)
+func needsRepo(st ScannerType) bool {
+	if !(st == typeDirScanner || st == typeNoGitScanner) {
 		return true
 	}
 	return false
 }
-
-// emptyCommit generates an empty commit used for scanning uncommitted changes
-func emptyCommit() *object.Commit {
-	return &object.Commit{
-		Hash:    plumbing.Hash{},
-		Message: "***STAGED CHANGES***",
-		Author: object.Signature{
-			Name:  "",
-			Email: "",
-			When:  time.Unix(0, 0).UTC(),
-		},
-	}
-}

+ 151 - 118
scan/scan_test.go

@@ -10,9 +10,10 @@ import (
 	"sort"
 	"testing"
 
-	"github.com/zricethezav/gitleaks/v6/config"
-	"github.com/zricethezav/gitleaks/v6/manager"
-	"github.com/zricethezav/gitleaks/v6/options"
+	"github.com/zricethezav/gitleaks/v7/report"
+
+	"github.com/zricethezav/gitleaks/v7/config"
+	"github.com/zricethezav/gitleaks/v7/options"
 
 	"github.com/sergi/go-diff/diffmatchpatch"
 )
@@ -27,13 +28,14 @@ func TestScan(t *testing.T) {
 		opts        options.Options
 		wantPath    string
 		wantErr     error
+		wantScanErr error
 		emptyRepo   bool
 		wantEmpty   bool
 	}{
 		{
 			description: "test local repo one aws leak",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_local_repo_one_aws_leak.json.got",
 				ReportFormat: "json",
 			},
@@ -43,7 +45,7 @@ func TestScan(t *testing.T) {
 			description: "test local repo one aws leak threaded",
 			opts: options.Options{
 				Threads:      runtime.GOMAXPROCS(0),
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_local_repo_one_aws_leak.json.got",
 				ReportFormat: "json",
 			},
@@ -52,24 +54,25 @@ func TestScan(t *testing.T) {
 		{
 			description: "test non existent repo",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/no_repo_here",
+				Path:         "../test_data/test_repos/no_repo_here",
 				ReportFormat: "json",
 			},
+			wantErr:   fmt.Errorf("stat ../test_data/test_repos/no_repo_here: no such file or directory"),
 			emptyRepo: true,
 		},
 		{
 			description: "test local repo one aws leak allowlisted",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				ReportFormat: "json",
-				Config:       "../test_data/test_configs/aws_key_allowlist_python_files.toml",
+				ConfigPath:   "../test_data/test_configs/aws_key_allowlist_python_files.toml",
 			},
 			wantEmpty: true,
 		},
 		{
 			description: "test local repo two leaks",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
+				Path:         "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks.json.got",
 				ReportFormat: "json",
 			},
@@ -78,7 +81,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks from Commit",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
+				Path:         "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_commit_from.json.got",
 				ReportFormat: "json",
 				CommitFrom:   "996865bb912f3bc45898a370a13aadb315014b55",
@@ -88,7 +91,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks to Commit",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
+				Path:         "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_commit_to.json.got",
 				ReportFormat: "json",
 				CommitTo:     "996865bb912f3bc45898a370a13aadb315014b55",
@@ -98,7 +101,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks to from Commit",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
+				Path:         "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_commit_to_from.json.got",
 				ReportFormat: "json",
 				CommitFrom:   "d8ac0b73aeeb45843319cdc5ce506516eb49bf7a",
@@ -109,7 +112,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks list Commits",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
+				Path:         "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_commit_range.json.got",
 				ReportFormat: "json",
 				Commits:      "d8ac0b73aeeb45843319cdc5ce506516eb49bf7a,996865bb912f3bc45898a370a13aadb315014b55,17471a5fda722a9e423f1a0d3f0d267ea009d41c,51f6dcf6b89b93f4075ba92c400b075631a6cc93,b10b3e2cb320a8c211fda94c4567299d37de7776",
@@ -119,7 +122,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks file list commits",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
+				Path:         "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_file_commit_range.json.got",
 				ReportFormat: "json",
 				CommitsFile:  "../test_data/test_options/test_local_repo_commits.txt",
@@ -129,8 +132,8 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks globally allowlisted",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
-				Config:       "../test_data/test_configs/aws_key_global_allowlist_file.toml",
+				Path:         "../test_data/test_repos/test_repo_2",
+				ConfigPath:   "../test_data/test_configs/aws_key_global_allowlist_file.toml",
 				ReportFormat: "json",
 			},
 			wantEmpty: true,
@@ -138,8 +141,8 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks allowlisted",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
-				Config:       "../test_data/test_configs/aws_key_allowlist_files.toml",
+				Path:         "../test_data/test_repos/test_repo_2",
+				ConfigPath:   "../test_data/test_configs/aws_key_allowlist_files.toml",
 				ReportFormat: "json",
 			},
 			wantEmpty: true,
@@ -147,9 +150,9 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo three leaks dev branch with reportGroup set",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_3",
+				Path:         "../test_data/test_repos/test_repo_3",
 				Report:       "../test_data/test_local_repo_three_leaks_with_report_groups.json.got",
-				Config:       "../test_data/test_configs/aws_key_with_report_groups.toml",
+				ConfigPath:   "../test_data/test_configs/aws_key_with_report_groups.toml",
 				Branch:       "dev",
 				ReportFormat: "json",
 			},
@@ -158,9 +161,9 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo three leaks dev branch",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_3",
+				Path:         "../test_data/test_repos/test_repo_3",
 				Report:       "../test_data/test_local_repo_three_leaks.json.got",
-				Config:       "../test_data/test_configs/aws_key.toml",
+				ConfigPath:   "../test_data/test_configs/aws_key.toml",
 				Branch:       "dev",
 				ReportFormat: "json",
 			},
@@ -169,16 +172,16 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo branch does not exist",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_3",
+				Path:         "../test_data/test_repos/test_repo_3",
 				Branch:       "nobranch",
 				ReportFormat: "json",
 			},
-			wantEmpty: true,
+			wantScanErr: fmt.Errorf("could not find branch nobranch"),
 		},
 		{
 			description: "test local repo one aws leak single Commit",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_local_repo_one_aws_leak_commit.json.got",
 				Commit:       "6557c92612d3b35979bd426d429255b3bf9fab74",
 				ReportFormat: "json",
@@ -188,9 +191,9 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo one aws leak AND leak on python files",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_local_repo_one_aws_leak_and_file_leak.json.got",
-				Config:       "../test_data/test_configs/aws_key_file_regex.toml",
+				ConfigPath:   "../test_data/test_configs/aws_key_file_regex.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_one_aws_leak_and_file_leak.json",
@@ -198,7 +201,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test owner path",
 			opts: options.Options{
-				OwnerPath:    "../test_data/test_repos/",
+				Path:         "../test_data/test_repos/",
 				Report:       "../test_data/test_local_owner_aws_leak.json.got",
 				ReportFormat: "json",
 			},
@@ -207,19 +210,19 @@ func TestScan(t *testing.T) {
 		{
 			description: "test owner path allowlist repo",
 			opts: options.Options{
-				OwnerPath:    "../test_data/test_repos/",
+				Path:         "../test_data/test_repos/",
 				Report:       "../test_data/test_local_owner_aws_leak_allowlist_repo.json.got",
 				ReportFormat: "json",
-				Config:       "../test_data/test_configs/aws_key_local_owner_allowlist_repo.toml",
+				ConfigPath:   "../test_data/test_configs/aws_key_local_owner_allowlist_repo.toml",
 			},
 			wantPath: "../test_data/test_local_owner_aws_leak_allowlist_repo.json",
 		},
 		{
 			description: "test entropy and regex",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_regex_entropy.json.got",
-				Config:       "../test_data/test_configs/regex_entropy.toml",
+				ConfigPath:   "../test_data/test_configs/regex_entropy.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_regex_entropy.json",
@@ -227,47 +230,27 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo four entropy alternative config",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_4",
-				Report:       "../test_data/test_local_repo_four_alt_config_entropy.json.got",
-				RepoConfig:   true,
-				ReportFormat: "json",
+				Path:           "../test_data/test_repos/test_repo_4",
+				Report:         "../test_data/test_local_repo_four_alt_config_entropy.json.got",
+				RepoConfigPath: "gitleaks.toml",
+				ReportFormat:   "json",
 			},
 			wantPath: "../test_data/test_local_repo_four_alt_config_entropy.json",
 		},
 		{
 			description: "test local repo four entropy alternative config",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_regex_allowlist.json.got",
-				Config:       "../test_data/test_configs/aws_key_aws_allowlisted.toml",
+				ConfigPath:   "../test_data/test_configs/aws_key_aws_allowlisted.toml",
 				ReportFormat: "json",
 			},
 			wantEmpty: true,
 		},
-		{
-			description: "test local repo one aws leak timeout",
-			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
-				Report:       "../test_data/test_local_repo_one_aws_leak.json.got",
-				ReportFormat: "json",
-				Timeout:      "10ns",
-			},
-			wantEmpty: true,
-		},
-		{
-			description: "test local repo one aws leak long timeout",
-			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
-				Report:       "../test_data/test_local_repo_one_aws_leak.json.got",
-				ReportFormat: "json",
-				Timeout:      "2m",
-			},
-			wantPath: "../test_data/test_local_repo_one_aws_leak.json",
-		},
 		{
 			description: "test owner path depth=2",
 			opts: options.Options{
-				OwnerPath:    "../test_data/test_repos/",
+				Path:         "../test_data/test_repos/",
 				Report:       "../test_data/test_local_owner_aws_leak_depth_2.json.got",
 				ReportFormat: "json",
 				Depth:        2,
@@ -277,7 +260,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo five files at Commit",
 			opts: options.Options{
-				RepoPath:      "../test_data/test_repos/test_repo_5",
+				Path:          "../test_data/test_repos/test_repo_5",
 				Report:        "../test_data/test_local_repo_five_files_at_commit.json.got",
 				FilesAtCommit: "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
 				ReportFormat:  "json",
@@ -287,7 +270,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo five files at latest Commit",
 			opts: options.Options{
-				RepoPath:      "../test_data/test_repos/test_repo_5",
+				Path:          "../test_data/test_repos/test_repo_5",
 				Report:        "../test_data/test_local_repo_five_files_at_latest_commit.json.got",
 				FilesAtCommit: "latest",
 				ReportFormat:  "json",
@@ -297,31 +280,31 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo five at Commit",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_5",
+				Path:         "../test_data/test_repos/test_repo_5",
 				Report:       "../test_data/test_local_repo_five_commit.json.got",
 				Commit:       "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
 				ReportFormat: "json",
-				Config:       "../test_data/test_configs/generic.toml",
+				ConfigPath:   "../test_data/test_configs/generic.toml",
 			},
 			wantPath: "../test_data/test_local_repo_five_commit.json",
 		},
 		{
 			description: "test local repo five at latest Commit",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_5",
+				Path:         "../test_data/test_repos/test_repo_5",
 				Report:       "../test_data/test_local_repo_five_at_latest_commit.json.got",
 				Commit:       "latest",
 				ReportFormat: "json",
-				Config:       "../test_data/test_configs/generic.toml",
+				ConfigPath:   "../test_data/test_configs/generic.toml",
 			},
 			wantPath: "../test_data/test_local_repo_five_at_latest_commit.json",
 		},
 		{
 			description: "test local repo six filename",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_6",
+				Path:         "../test_data/test_repos/test_repo_6",
 				Report:       "../test_data/test_local_repo_six_filename.json.got",
-				Config:       "../test_data/test_configs/regex_filename.toml",
+				ConfigPath:   "../test_data/test_configs/regex_filename.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_six_filename.json",
@@ -329,9 +312,9 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo six filepath",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_6",
+				Path:         "../test_data/test_repos/test_repo_6",
 				Report:       "../test_data/test_local_repo_six_filepath.json.got",
-				Config:       "../test_data/test_configs/regex_filepath.toml",
+				ConfigPath:   "../test_data/test_configs/regex_filepath.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_six_filepath.json",
@@ -339,9 +322,9 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo six filename and filepath",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_6",
+				Path:         "../test_data/test_repos/test_repo_6",
 				Report:       "../test_data/test_local_repo_six_filepath_filename.json.got",
-				Config:       "../test_data/test_configs/regex_filepath_filename.toml",
+				ConfigPath:   "../test_data/test_configs/regex_filepath_filename.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_six_filepath_filename.json",
@@ -349,9 +332,9 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo six path globally allowlisted",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_6",
+				Path:         "../test_data/test_repos/test_repo_6",
 				Report:       "../test_data/test_local_repo_six_path_globally_allowlisted.json.got",
-				Config:       "../test_data/test_configs/aws_key_global_allowlist_path.toml",
+				ConfigPath:   "../test_data/test_configs/aws_key_global_allowlist_path.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_six_path_globally_allowlisted.json",
@@ -359,7 +342,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo six leaks since date",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_6",
+				Path:         "../test_data/test_repos/test_repo_6",
 				Report:       "../test_data/test_local_repo_six_leaks_since_date.json.got",
 				ReportFormat: "json",
 				CommitSince:  "2019-10-25",
@@ -369,7 +352,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two leaks until date",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_6",
+				Path:         "../test_data/test_repos/test_repo_6",
 				Report:       "../test_data/test_local_repo_six_leaks_until_date.json.got",
 				ReportFormat: "json",
 				CommitUntil:  "2019-10-25",
@@ -379,7 +362,7 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo four leaks timerange Commit",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_4",
+				Path:         "../test_data/test_repos/test_repo_4",
 				Report:       "../test_data/test_local_repo_four_leaks_commit_timerange.json.got",
 				ReportFormat: "json",
 				CommitSince:  "2019-10-25T13:01:27-0400",
@@ -390,27 +373,17 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo two allowlist Commit config",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
+				Path:         "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_allowlist_commits.json.got",
-				Config:       "../test_data/test_configs/allowlist_commit.toml",
+				ConfigPath:   "../test_data/test_configs/allowlist_commit.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_two_allowlist_commits.json",
 		},
-		{
-			description: "test local repo two deletion",
-			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_2",
-				Report:       "../test_data/test_local_repo_two_leaks_deletion.json.got",
-				ReportFormat: "json",
-				Deletion:     true,
-			},
-			wantPath: "../test_data/test_local_repo_two_leaks_deletion.json",
-		},
 		{
 			description: "test local repo eight (merges)",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_8",
+				Path:         "../test_data/test_repos/test_repo_8",
 				Report:       "../test_data/test_local_repo_eight.json.got",
 				ReportFormat: "json",
 			},
@@ -419,13 +392,33 @@ func TestScan(t *testing.T) {
 		{
 			description: "test local repo nine",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_9",
+				Path:         "../test_data/test_repos/test_repo_9",
 				Report:       "../test_data/test_local_repo_nine_aws_leak.json.got",
-				Config:       "../test_data/test_configs/large_with_global_allowlist_regex.toml",
+				ConfigPath:   "../test_data/test_configs/large_with_global_allowlist_regex.toml",
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_nine_aws_leak.json",
 		},
+		{
+			description: "test dir one no git",
+			opts: options.Options{
+				Path:         "../test_data/test_repos/test_dir_1",
+				Report:       "../test_data/test_dir1_aws_leak.json.got",
+				ReportFormat: "json",
+				NoGit:        true,
+			},
+			wantPath: "../test_data/test_dir1_aws_leak.json",
+		},
+		{
+			description: "test file with leak no git",
+			opts: options.Options{
+				Path:         "../test_data/test_repos/test_dir_1/server.test.py",
+				Report:       "../test_data/test_file1_aws_leak.json.got",
+				ReportFormat: "json",
+				NoGit:        true,
+			},
+			wantPath: "../test_data/test_file1_aws_leak.json",
+		},
 	}
 
 	for _, test := range tests {
@@ -435,31 +428,43 @@ func TestScan(t *testing.T) {
 			t.Error(err)
 		}
 
-		m, err := manager.NewManager(test.opts, cfg)
+		scanner, err := NewScanner(test.opts, cfg)
+		if test.wantErr != nil {
+			if err == nil {
+				t.Fatalf("did not receive wantErr: %v", test.wantErr)
+			}
+			if err.Error() != test.wantErr.Error() {
+				t.Fatalf("wantErr does not equal err received: %v", err.Error())
+			}
+			continue
+		}
 		if err != nil {
-			t.Error(err)
+			t.Fatal(err)
 		}
 
-		err = Run(m)
+		scannerReport, err := scanner.Scan()
 
-		if test.wantErr != nil {
+		if test.wantScanErr != nil {
 			if err == nil {
-				t.Errorf("did not receive wantErr: %v", test.wantErr)
+				t.Fatalf("did not receive wantErr: %v", test.wantScanErr)
 			}
-			if err.Error() != test.wantErr.Error() {
-				t.Errorf("wantErr does not equal err received: %v", err.Error())
+			if err.Error() != test.wantScanErr.Error() {
+				t.Fatalf("wantErr does not equal err received: %v", err.Error())
 			}
 			continue
 		}
+		if err != nil {
+			t.Fatal(err)
+		}
 
-		err = m.Report()
+		err = report.WriteReport(scannerReport, test.opts, cfg)
 		if err != nil {
 			t.Error(err)
 		}
 
 		if test.wantEmpty {
-			if len(m.GetLeaks()) != 0 {
-				t.Errorf("wanted no leaks but got some instead: %+v", m.GetLeaks())
+			if len(scannerReport.Leaks) != 0 {
+				t.Errorf("wanted no leaks but got some instead: %+v", scannerReport.Leaks)
 			}
 			continue
 		}
@@ -473,6 +478,7 @@ func TestScan(t *testing.T) {
 	}
 }
 
+//
 func TestScanUncommited(t *testing.T) {
 	moveDotGit("dotGit", ".git")
 	defer moveDotGit(".git", "dotGit")
@@ -481,6 +487,7 @@ func TestScanUncommited(t *testing.T) {
 		opts         options.Options
 		wantPath     string
 		wantErr      error
+		wantScanErr  error
 		emptyRepo    bool
 		wantEmpty    bool
 		fileToChange string
@@ -489,9 +496,9 @@ func TestScanUncommited(t *testing.T) {
 		{
 			description: "test scan local one leak",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Path:         "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_local_repo_one_aws_leak_uncommitted.json.got",
-				Uncommited:   true,
+				Unstaged:     true,
 				ReportFormat: "json",
 			},
 			wantPath:     "../test_data/test_local_repo_one_aws_leak_uncommitted.json",
@@ -501,8 +508,8 @@ func TestScanUncommited(t *testing.T) {
 		{
 			description: "test scan local no leak",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_1",
-				Uncommited:   true,
+				Path:         "../test_data/test_repos/test_repo_1",
+				Unstaged:     true,
 				ReportFormat: "json",
 			},
 			wantEmpty:    true,
@@ -512,9 +519,9 @@ func TestScanUncommited(t *testing.T) {
 		{
 			description: "test scan repo with no commits",
 			opts: options.Options{
-				RepoPath:     "../test_data/test_repos/test_repo_7",
+				Path:         "../test_data/test_repos/test_repo_7",
 				Report:       "../test_data/test_local_repo_seven_aws_leak_uncommitted.json.got",
-				Uncommited:   true,
+				Unstaged:     true,
 				ReportFormat: "json",
 			},
 			wantPath: "../test_data/test_local_repo_seven_aws_leak_uncommitted.json",
@@ -527,11 +534,11 @@ func TestScanUncommited(t *testing.T) {
 		)
 		fmt.Println(test.description)
 		if test.fileToChange != "" {
-			old, err = ioutil.ReadFile(fmt.Sprintf("%s/%s", test.opts.RepoPath, test.fileToChange))
+			old, err = ioutil.ReadFile(fmt.Sprintf("%s/%s", test.opts.Path, test.fileToChange))
 			if err != nil {
 				t.Error(err)
 			}
-			altered, err := os.OpenFile(fmt.Sprintf("%s/%s", test.opts.RepoPath, test.fileToChange),
+			altered, err := os.OpenFile(fmt.Sprintf("%s/%s", test.opts.Path, test.fileToChange),
 				os.O_WRONLY|os.O_APPEND, 0644)
 			if err != nil {
 				t.Error(err)
@@ -548,21 +555,42 @@ func TestScanUncommited(t *testing.T) {
 		if err != nil {
 			t.Error(err)
 		}
-		m, err := manager.NewManager(test.opts, cfg)
+		scanner, err := NewScanner(test.opts, cfg)
+		if test.wantErr != nil {
+			if err == nil {
+				t.Fatalf("did not receive wantErr: %v", test.wantErr)
+			}
+			if err.Error() != test.wantErr.Error() {
+				t.Fatalf("wantErr does not equal err received: %v", err.Error())
+			}
+			continue
+		}
 		if err != nil {
-			t.Error(err)
+			t.Fatal(err)
 		}
 
-		if err := Run(m); err != nil {
-			t.Error(err)
+		scannerReport, err := scanner.Scan()
+
+		if test.wantScanErr != nil {
+			if err == nil {
+				t.Fatalf("did not receive wantErr: %v", test.wantScanErr)
+			}
+			if err.Error() != test.wantScanErr.Error() {
+				t.Fatalf("wantErr does not equal err received: %v", err.Error())
+			}
+			continue
+		}
+		if err != nil {
+			t.Fatal(err)
 		}
 
-		if err := m.Report(); err != nil {
+		err = report.WriteReport(scannerReport, test.opts, cfg)
+		if err != nil {
 			t.Error(err)
 		}
 
 		if test.fileToChange != "" {
-			err = ioutil.WriteFile(fmt.Sprintf("%s/%s", test.opts.RepoPath, test.fileToChange), old, 0)
+			err = ioutil.WriteFile(fmt.Sprintf("%s/%s", test.opts.Path, test.fileToChange), old, 0)
 			if err != nil {
 				t.Error(err)
 			}
@@ -583,8 +611,8 @@ func TestScanUncommited(t *testing.T) {
 
 func fileCheck(wantPath, gotPath string) error {
 	var (
-		gotLeaks  []manager.Leak
-		wantLeaks []manager.Leak
+		gotLeaks  []report.Leak
+		wantLeaks []report.Leak
 	)
 	want, err := ioutil.ReadFile(wantPath)
 	if err != nil {
@@ -603,7 +631,7 @@ func fileCheck(wantPath, gotPath string) error {
 
 	err = json.Unmarshal(want, &wantLeaks)
 	if err != nil {
-		return nil
+		return err
 	}
 
 	sort.Slice(gotLeaks, func(i, j int) bool { return (gotLeaks)[i].Commit < (gotLeaks)[j].Commit })
@@ -629,6 +657,11 @@ func moveDotGit(from, to string) error {
 		if !dir.IsDir() {
 			continue
 		}
+		_, err := os.Stat(fmt.Sprintf("%s/%s/%s", testRepoBase, dir.Name(), from))
+		if os.IsNotExist(err) {
+			continue
+		}
+
 		err = os.Rename(fmt.Sprintf("%s/%s/%s", testRepoBase, dir.Name(), from),
 			fmt.Sprintf("%s/%s/%s", testRepoBase, dir.Name(), to))
 		if err != nil {

+ 190 - 0
scan/unstaged.go

@@ -0,0 +1,190 @@
+package scan
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v7/report"
+
+	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing"
+	"github.com/sergi/go-diff/diffmatchpatch"
+)
+
+// UnstagedScanner is an unstaged scanner. This is the scanner used when you don't provide program arguments
+// which will then scan your PWD. This scans unstaged changes in your repo.
+type UnstagedScanner struct {
+	BaseScanner
+	repo     *git.Repository
+	repoName string
+}
+
+// NewUnstagedScanner returns an unstaged scanner
+func NewUnstagedScanner(base BaseScanner, repo *git.Repository) *UnstagedScanner {
+	us := &UnstagedScanner{
+		BaseScanner: base,
+		repo:        repo,
+		repoName:    getRepoName(base.opts),
+	}
+	us.scannerType = typeUnstagedScanner
+	return us
+}
+
+// Scan kicks off an unstaged scan. This will attempt to determine unstaged changes which are then scanned.
+func (us *UnstagedScanner) Scan() (report.Report, error) {
+	var scannerReport report.Report
+	r, err := us.repo.Head()
+	if err == plumbing.ErrReferenceNotFound {
+		wt, err := us.repo.Worktree()
+		if err != nil {
+			return scannerReport, err
+		}
+
+		status, err := wt.Status()
+		if err != nil {
+			return scannerReport, err
+		}
+		for fn := range status {
+			workTreeBuf := bytes.NewBuffer(nil)
+			workTreeFile, err := wt.Filesystem.Open(fn)
+			if err != nil {
+				continue
+			}
+			if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
+				return scannerReport, err
+			}
+			leaks := checkRules(us.BaseScanner, emptyCommit(), us.repoName, workTreeFile.Name(), workTreeBuf.String())
+			for _, leak := range leaks {
+				if us.opts.Verbose {
+					logLeak(leak, us.opts.Redact)
+				}
+				scannerReport.Leaks = append(scannerReport.Leaks, leak)
+			}
+		}
+		return scannerReport, nil
+	} else if err != nil {
+		return scannerReport, err
+	}
+
+	c, err := us.repo.CommitObject(r.Hash())
+	if err != nil {
+		return scannerReport, err
+	}
+
+	// Staged change so the Commit details do not yet exist. Insert empty defaults.
+	c.Hash = plumbing.Hash{}
+	c.Message = ""
+	c.Author.Name = ""
+	c.Author.Email = ""
+	c.Author.When = time.Unix(0, 0).UTC()
+
+	prevTree, err := c.Tree()
+	if err != nil {
+		return scannerReport, err
+	}
+	wt, err := us.repo.Worktree()
+	if err != nil {
+		return scannerReport, err
+	}
+
+	status, err := wt.Status()
+	if err != nil {
+		return scannerReport, err
+	}
+	for fn, state := range status {
+		var (
+			prevFileContents string
+			currFileContents string
+			filename         string
+		)
+
+		if state.Staging != git.Untracked {
+			if state.Staging == git.Deleted {
+				// file in staging has been deleted, aka it is not on the filesystem
+				// so the contents of the file are ""
+				currFileContents = ""
+			} else {
+				workTreeBuf := bytes.NewBuffer(nil)
+				workTreeFile, err := wt.Filesystem.Open(fn)
+				if err != nil {
+					continue
+				}
+				if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
+					return scannerReport, err
+				}
+				currFileContents = workTreeBuf.String()
+				filename = workTreeFile.Name()
+			}
+
+			// get files at HEAD state
+			prevFile, err := prevTree.File(fn)
+			if err != nil {
+				prevFileContents = ""
+
+			} else {
+				prevFileContents, err = prevFile.Contents()
+				if err != nil {
+					return scannerReport, err
+				}
+				if filename == "" {
+					filename = prevFile.Name
+				}
+			}
+
+			dmp := diffmatchpatch.New()
+			diffs := dmp.DiffMain(prevFileContents, currFileContents, false)
+			prettyDiff := diffPrettyText(diffs)
+
+			var diffContents string
+			for _, d := range diffs {
+				if d.Type == diffmatchpatch.DiffInsert {
+					diffContents += fmt.Sprintf("%s\n", d.Text)
+				}
+			}
+			leaks := checkRules(us.BaseScanner, c, us.repoName, filename, diffContents)
+
+			lineLookup := make(map[string]bool)
+			for _, leak := range leaks {
+				for lineNumber, line := range strings.Split(prettyDiff, "\n") {
+					if strings.HasPrefix(line, diffAddPrefix) && strings.Contains(line, leak.Line) {
+						if _, ok := lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)]; !ok {
+							lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)] = true
+							leak.LineNumber = lineNumber + 1
+							if us.opts.Verbose {
+								logLeak(leak, us.opts.Redact)
+							}
+							scannerReport.Leaks = append(scannerReport.Leaks, leak)
+							break
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return scannerReport, err
+}
+
+// DiffPrettyText converts a []Diff into a colored text report.
+// TODO open PR for this
+func diffPrettyText(diffs []diffmatchpatch.Diff) string {
+	var buff bytes.Buffer
+	for _, diff := range diffs {
+		text := diff.Text
+
+		switch diff.Type {
+		case diffmatchpatch.DiffInsert:
+			_, _ = buff.WriteString("+")
+			_, _ = buff.WriteString(text)
+		case diffmatchpatch.DiffDelete:
+			_, _ = buff.WriteString("-")
+			_, _ = buff.WriteString(text)
+		case diffmatchpatch.DiffEqual:
+			_, _ = buff.WriteString(text)
+		}
+	}
+	return buff.String()
+}

+ 510 - 0
scan/utils.go

@@ -0,0 +1,510 @@
+package scan
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"math"
+	"os"
+	"path/filepath"
+	"regexp"
+	"runtime"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v7/report"
+
+	"github.com/zricethezav/gitleaks/v7/config"
+	"github.com/zricethezav/gitleaks/v7/options"
+
+	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing"
+	"github.com/go-git/go-git/v5/plumbing/object"
+	"github.com/go-git/go-git/v5/storage/memory"
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	diffAddPrefix     = "+"
+	diffDelPrefix     = "-"
+	diffLineSignature = " @@"
+	defaultLineNumber = 1
+
+	maxLineLen = 200
+)
+
+func obtainCommit(repo *git.Repository, commitSha string) (*object.Commit, error) {
+	if commitSha == "latest" {
+		ref, err := repo.Head()
+		if err != nil {
+			return nil, err
+		}
+		commitSha = ref.Hash().String()
+	}
+	return repo.CommitObject(plumbing.NewHash(commitSha))
+}
+
+func getRepoName(opts options.Options) string {
+	if opts.RepoURL != "" {
+		return filepath.Base(opts.RepoURL)
+	}
+	if opts.Path != "" {
+		return filepath.Base(opts.Path)
+	}
+	if opts.CheckUncommitted() {
+		dir, _ := os.Getwd()
+		return filepath.Base(dir)
+	}
+	return ""
+}
+
+func getRepo(opts options.Options) (*git.Repository, error) {
+	if opts.OpenLocal() {
+		if opts.Path != "" {
+			log.Infof("opening %s\n", opts.Path)
+		} else {
+			log.Info("opening .")
+		}
+		return git.PlainOpen(opts.Path)
+	}
+	if opts.CheckUncommitted() {
+		// open git repo from PWD
+		dir, err := os.Getwd()
+		if err != nil {
+			return nil, err
+		}
+		log.Debugf("opening %s as a repo\n", dir)
+		return git.PlainOpen(dir)
+	}
+	return cloneRepo(opts)
+}
+
+func cloneRepo(opts options.Options) (*git.Repository, error) {
+	cloneOpts, err := opts.CloneOptions()
+	if err != nil {
+		return nil, err
+	}
+	if opts.ClonePath != "" {
+		log.Infof("cloning... %s to %s", cloneOpts.URL, opts.ClonePath)
+		return git.PlainClone(opts.ClonePath, false, cloneOpts)
+	}
+	log.Infof("cloning... %s", cloneOpts.URL)
+	return git.Clone(memory.NewStorage(), nil, cloneOpts)
+}
+
+// depthReached checks if i meets the depth (--depth=) if set
+func depthReached(i int, opts options.Options) bool {
+	if opts.Depth != 0 && opts.Depth == i {
+		log.Warnf("Exceeded depth limit (%d)", i)
+		return true
+	}
+	return false
+}
+
+// emptyCommit generates an empty commit used for scanning uncommitted changes
+func emptyCommit() *object.Commit {
+	return &object.Commit{
+		Hash:    plumbing.Hash{},
+		Message: "",
+		Author: object.Signature{
+			Name:  "",
+			Email: "",
+			When:  time.Unix(0, 0).UTC(),
+		},
+	}
+}
+
+// howManyThreads will return a number 1-GOMAXPROCS which is the number
+// of goroutines that will spawn during gitleaks execution
+func howManyThreads(threads int) int {
+	maxThreads := runtime.GOMAXPROCS(0)
+	if threads == 0 {
+		return 1
+	} else if threads > maxThreads {
+		log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
+		return maxThreads
+	}
+	return threads
+}
+
+func shouldLog(scanner BaseScanner) bool {
+	if scanner.opts.Verbose && scanner.scannerType != typeRepoScanner &&
+		scanner.scannerType != typeCommitScanner &&
+		scanner.scannerType != typeUnstagedScanner &&
+		scanner.scannerType != typeNoGitScanner {
+		return true
+	}
+	return false
+}
+
+func checkRules(scanner BaseScanner, commit *object.Commit, repoName, filePath, content string) []report.Leak {
+	filename := filepath.Base(filePath)
+	path := filepath.Dir(filePath)
+	var leaks []report.Leak
+
+	skipRuleLookup := make(map[string]bool)
+	// First do simple rule checks based on filename
+	if skipCheck(scanner.cfg, filename, path) {
+		return leaks
+	}
+
+	for _, rule := range scanner.cfg.Rules {
+		if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
+			continue
+		}
+
+		if skipRule(rule, filename, filePath, commit.Hash.String()) {
+			skipRuleLookup[rule.Description] = true
+			continue
+		}
+
+		// If it doesnt contain a Content regex then it is a filename regex match
+		if !ruleContainRegex(rule) {
+			leak := report.Leak{
+				LineNumber: defaultLineNumber,
+				Line:       "",
+				Offender:   limitLen("Filename/path offender: " + filename),
+				Commit:     commit.Hash.String(),
+				Repo:       repoName,
+				RepoURL:    scanner.opts.RepoURL,
+				Message:    limitLen(commit.Message),
+				Rule:       rule.Description,
+				Author:     commit.Author.Name,
+				Email:      commit.Author.Email,
+				Date:       commit.Author.When,
+				Tags:       strings.Join(rule.Tags, ", "),
+				File:       filePath,
+				// Operation:  diffOpToString(bundle.Operation),
+			}
+			leak.LeakURL = leakURL(leak)
+			if shouldLog(scanner) {
+				logLeak(leak, scanner.opts.Redact)
+			}
+			leaks = append(leaks, leak)
+		}
+	}
+
+	lineNumber := 1
+
+	for _, line := range strings.Split(content, "\n") {
+		for _, rule := range scanner.cfg.Rules {
+			if isCommitAllowListed(commit.Hash.String(), rule.AllowList.Commits) {
+				break
+			}
+			if _, ok := skipRuleLookup[rule.Description]; ok {
+				continue
+			}
+
+			offender := rule.Regex.FindString(line)
+			if offender == "" {
+				continue
+			}
+
+			// check entropy
+			groups := rule.Regex.FindStringSubmatch(offender)
+			if isAllowListed(line, append(rule.AllowList.Regexes, scanner.cfg.Allowlist.Regexes...)) {
+				continue
+			}
+			if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
+				continue
+			}
+
+			// 0 is a match for the full regex pattern
+			if 0 < rule.ReportGroup && rule.ReportGroup < len(groups) {
+				offender = groups[rule.ReportGroup]
+			}
+
+			leak := report.Leak{
+				LineNumber: lineNumber,
+				Line:       limitLen(line),
+				Offender:   limitLen(offender),
+				Commit:     commit.Hash.String(),
+				Repo:       repoName,
+				RepoURL:    scanner.opts.RepoURL,
+				Message:    limitLen(commit.Message),
+				Rule:       rule.Description,
+				Author:     commit.Author.Name,
+				Email:      commit.Author.Email,
+				Date:       commit.Author.When,
+				Tags:       strings.Join(rule.Tags, ", "),
+				File:       filePath,
+			}
+			leak.LeakURL = leakURL(leak)
+			if shouldLog(scanner) {
+				logLeak(leak, scanner.opts.Redact)
+			}
+			leaks = append(leaks, leak)
+		}
+		lineNumber++
+	}
+	return leaks
+}
+
+func limitLen(str string) string {
+	if len(str) > 200 {
+		return str[0:maxLineLen-1] + "..."
+	}
+	return str
+}
+
+func logLeak(leak report.Leak, redact bool) {
+	if redact {
+		leak = report.RedactLeak(leak)
+	}
+	var b []byte
+	b, _ = json.MarshalIndent(leak, "", "	")
+	fmt.Println(string(b))
+}
+
+// getLogOptions determines what log options are used when iterating through commits.
+// It is similar to `git log {branch}`. Default behavior is to log ALL branches so
+// gitleaks gets the full git history.
+func logOptions(repo *git.Repository, opts options.Options) (*git.LogOptions, error) {
+	var logOpts git.LogOptions
+	const dateformat string = "2006-01-02"
+	const timeformat string = "2006-01-02T15:04:05-0700"
+	if opts.CommitFrom != "" {
+		logOpts.From = plumbing.NewHash(opts.CommitFrom)
+	}
+	if opts.CommitSince != "" {
+		if t, err := time.Parse(timeformat, opts.CommitSince); err == nil {
+			logOpts.Since = &t
+		} else if t, err := time.Parse(dateformat, opts.CommitSince); err == nil {
+			logOpts.Since = &t
+		} else {
+			return nil, err
+		}
+		logOpts.All = true
+	}
+	if opts.CommitUntil != "" {
+		if t, err := time.Parse(timeformat, opts.CommitUntil); err == nil {
+			logOpts.Until = &t
+		} else if t, err := time.Parse(dateformat, opts.CommitUntil); err == nil {
+			logOpts.Until = &t
+		} else {
+			return nil, err
+		}
+		logOpts.All = true
+	}
+	if opts.Branch != "" {
+		ref, err := repo.Storer.Reference(plumbing.NewBranchReferenceName(opts.Branch))
+		if err != nil {
+			return nil, fmt.Errorf("could not find branch %s", opts.Branch)
+		}
+		logOpts = git.LogOptions{
+			From: ref.Hash(),
+		}
+
+		if logOpts.From.IsZero() {
+			return nil, fmt.Errorf("could not find branch %s", opts.Branch)
+		}
+		return &logOpts, nil
+	}
+	if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
+		return &logOpts, nil
+	}
+	return &git.LogOptions{All: true}, nil
+}
+
+func skipCheck(cfg config.Config, filename string, path string) bool {
+	// We want to check if there is a allowlist for this file
+	if len(cfg.Allowlist.Files) != 0 {
+		for _, reFileName := range cfg.Allowlist.Files {
+			if regexMatched(filename, reFileName) {
+				log.Debugf("allowlisted file found, skipping scan of file: %s", filename)
+				return true
+			}
+		}
+	}
+
+	// We want to check if there is a allowlist for this path
+	if len(cfg.Allowlist.Paths) != 0 {
+		for _, reFilePath := range cfg.Allowlist.Paths {
+			if regexMatched(path, reFilePath) {
+				log.Debugf("file in allowlisted path found, skipping scan of file: %s", filename)
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func skipRule(rule config.Rule, filename, path, commitSha string) bool {
+	// For each rule we want to check filename allowlists
+	if isAllowListed(filename, rule.AllowList.Files) || isAllowListed(path, rule.AllowList.Paths) {
+		return true
+	}
+
+	// If it has fileNameRegex and it doesnt match we continue to next rule
+	if ruleContainFileRegex(rule) && !regexMatched(filename, rule.File) {
+		return true
+	}
+
+	// If it has filePathRegex and it doesnt match we continue to next rule
+	if ruleContainPathRegex(rule) && !regexMatched(path, rule.Path) {
+		return true
+	}
+
+	return false
+}
+
+// regexMatched matched an interface to a regular expression. The interface f can
+// be a string type or go-git *object.File type.
+func regexMatched(f string, re *regexp.Regexp) bool {
+	if re == nil {
+		return false
+	}
+	if re.FindString(f) != "" {
+		return true
+	}
+	return false
+}
+
+// trippedEntropy checks if a given capture group or offender falls in between entropy ranges
+// supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
+func trippedEntropy(groups []string, rule config.Rule) bool {
+	for _, e := range rule.Entropies {
+		if len(groups) > e.Group {
+			entropy := shannonEntropy(groups[e.Group])
+			if entropy >= e.Min && entropy <= e.Max {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// shannonEntropy calculates the entropy of data using the formula defined here:
+// https://en.wiktionary.org/wiki/Shannon_entropy
+// Another way to think about what this is doing is calculating the number of bits
+// needed to on average encode the data. So, the higher the entropy, the more random the data, the
+// more bits needed to encode that data.
+func shannonEntropy(data string) (entropy float64) {
+	if data == "" {
+		return 0
+	}
+
+	charCounts := make(map[rune]int)
+	for _, char := range data {
+		charCounts[char]++
+	}
+
+	invLength := 1.0 / float64(len(data))
+	for _, count := range charCounts {
+		freq := float64(count) * invLength
+		entropy -= freq * math.Log2(freq)
+	}
+
+	return entropy
+}
+
+// Checks if the given rule has a regex
+func ruleContainRegex(rule config.Rule) bool {
+	if rule.Regex == nil {
+		return false
+	}
+	if rule.Regex.String() == "" {
+		return false
+	}
+	return true
+}
+
+// Checks if the given rule has a file name regex
+func ruleContainFileRegex(rule config.Rule) bool {
+	if rule.File == nil {
+		return false
+	}
+	if rule.File.String() == "" {
+		return false
+	}
+	return true
+}
+
+// Checks if the given rule has a file path regex
+func ruleContainPathRegex(rule config.Rule) bool {
+	if rule.Path == nil {
+		return false
+	}
+	if rule.Path.String() == "" {
+		return false
+	}
+	return true
+}
+
+func isCommitAllowListed(commitHash string, allowlistedCommits []string) bool {
+	for _, hash := range allowlistedCommits {
+		if commitHash == hash {
+			return true
+		}
+	}
+	return false
+}
+
+func isAllowListed(target string, allowList []*regexp.Regexp) bool {
+	if len(allowList) != 0 {
+		for _, re := range allowList {
+			if re.FindString(target) != "" {
+				return true
+			}
+		}
+	}
+	return false
+
+}
+
+func optsToCommits(opts options.Options) ([]string, error) {
+	if opts.Commits != "" {
+		return strings.Split(opts.Commits, ","), nil
+	}
+	file, err := os.Open(opts.CommitsFile)
+	if err != nil {
+		return []string{}, err
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	var commits []string
+	for scanner.Scan() {
+		commits = append(commits, scanner.Text())
+	}
+	return commits, nil
+}
+
+func extractLine(patchContent string, leak report.Leak, lineLookup map[string]bool) int {
+	i := strings.Index(patchContent, fmt.Sprintf("\n+++ b/%s", leak.File))
+	filePatchContent := patchContent[i+1:]
+	i = strings.Index(filePatchContent, "diff --git")
+	if i != -1 {
+		filePatchContent = filePatchContent[:i]
+	}
+	chunkStartLine := 0
+	currLine := 0
+	for _, patchLine := range strings.Split(filePatchContent, "\n") {
+		if strings.HasPrefix(patchLine, "@@") {
+			i := strings.Index(patchLine, diffAddPrefix)
+			pairs := strings.Split(strings.Split(patchLine[i+1:], diffLineSignature)[0], ",")
+			chunkStartLine, _ = strconv.Atoi(pairs[0])
+			currLine = -1
+		}
+		if strings.HasPrefix(patchLine, diffDelPrefix) {
+			currLine--
+		}
+		if strings.HasPrefix(patchLine, diffAddPrefix) && strings.Contains(patchLine, leak.Line) {
+			lineNumber := chunkStartLine + currLine
+			if _, ok := lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)]; !ok {
+				lineLookup[fmt.Sprintf("%s%s%d%s", leak.Offender, leak.Line, lineNumber, leak.File)] = true
+				return lineNumber
+			}
+		}
+		currLine++
+	}
+	return defaultLineNumber
+}
+
+func leakURL(leak report.Leak) string {
+	if leak.RepoURL != "" {
+		return fmt.Sprintf("%s/blob/%s/%s#L%d", leak.RepoURL, leak.Commit, leak.File, leak.LineNumber)
+	}
+	return ""
+}

+ 4 - 2
test_data/test_configs/allowlist_commit.toml

@@ -1,12 +1,14 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
+    [rules.allowlist]
+		description = "if we encounter this commit for this rule, skip it"
+        commits = ["b10b3e2cb320a8c211fda94c4567299d37de7776"]
 
 
 [allowlist]
   commits = [
-    "b10b3e2cb320a8c211fda94c4567299d37de7776",
     "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
     "996865bb912f3bc45898a370a13aadb315014b55"
   ]

+ 1 - 1
test_data/test_configs/aws_key.toml

@@ -4,6 +4,6 @@
 	tags = ["key", "AWS"]
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]

+ 1 - 1
test_data/test_configs/aws_key_allowlist_files.toml

@@ -1,5 +1,5 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
         [rules.allowlist]

+ 1 - 1
test_data/test_configs/aws_key_allowlist_python_files.toml

@@ -1,5 +1,5 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
         [rules.allowlist]

+ 1 - 1
test_data/test_configs/aws_key_aws_allowlisted.toml

@@ -1,5 +1,5 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
         [rules.allowlist]

+ 1 - 1
test_data/test_configs/aws_key_file_regex.toml

@@ -4,7 +4,7 @@
 	tags = ["key", "AWS"]
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
 

+ 1 - 1
test_data/test_configs/aws_key_global_allowlist_file.toml

@@ -1,5 +1,5 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
 

+ 1 - 1
test_data/test_configs/aws_key_global_allowlist_path.toml

@@ -1,5 +1,5 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
 

+ 1 - 1
test_data/test_configs/aws_key_local_owner_allowlist_repo.toml

@@ -1,5 +1,5 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
     [allowlist]

+ 1 - 1
test_data/test_configs/aws_key_with_report_groups.toml

@@ -4,7 +4,7 @@
 	tags = ["key", "AWS"]
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''AWS secret: ("?)((A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16})("?)'''
     tags = ["key", "AWS"]
     reportGroup = 2

+ 1 - 1
test_data/test_configs/bad_aws_key.toml

@@ -4,6 +4,6 @@
 	tags = ["key", "AWS"]
 
 [[rules]]
-    description = AWS Manager ID"
+    description = AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]

+ 1 - 1
test_data/test_configs/bad_aws_key_file_regex.toml

@@ -4,7 +4,7 @@
 	tags = ["key", "AWS"]
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
 

+ 1 - 1
test_data/test_configs/bad_aws_key_global_allowlist_file.toml

@@ -1,5 +1,5 @@
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]
 

+ 1 - 1
test_data/test_configs/bad_regex_aws_key.toml

@@ -4,6 +4,6 @@
 	tags = ["key", "AWS"]
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     tags = ["key", "AWS"]

+ 16 - 0
test_data/test_configs/bug_453.toml

@@ -0,0 +1,16 @@
+title = "gitleaks config"
+
+[[rules]]
+	description = "Asymmetric Private Key"
+	regex = '''-----BEGIN ((EC|PGP|DSA|RSA|OPENSSH) )?PRIVATE KEY( BLOCK)?-----'''
+	tags = ["key", "AsymmetricPrivateKey"]
+
+[[rules]]
+	description = "Passwords"
+	regex = '''(?i)password=(?i)'''
+	tags = ["key", "Generic"]
+
+[[rules]]
+	description = "secret secret.com"
+	regex = '''(?i)secret\.com'''
+	tags = ["key", "custom"]

+ 1 - 1
test_data/test_configs/large.toml

@@ -122,7 +122,7 @@ title = "gitleaks config"
 	tags = ["key", "twilio"]
 
 [[rules]]
-	description = "AWS Manager ID"
+	description = "AWS Access Key"
 	regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
 	tags = ["key", "AWS"]
 		[rules.allowlist]

+ 1 - 1
test_data/test_configs/large_with_global_allowlist_regex.toml

@@ -122,7 +122,7 @@ title = "gitleaks config"
 	tags = ["key", "twilio"]
 
 [[rules]]
-	description = "AWS Manager ID"
+	description = "AWS Access Key"
 	regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
 	tags = ["key", "AWS"]
 		[rules.allowlist]

+ 1 - 1
test_data/test_configs/regex_filepath.toml

@@ -1,6 +1,6 @@
 
 [[rules]]
-    description = "AWS Manager ID"
+    description = "AWS Access Key"
     regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
     path = '''config(guration)?'''
     tags = ["key", "AWS"]

+ 30 - 0
test_data/test_dir1_aws_leak.json

@@ -0,0 +1,30 @@
+[
+ {
+  "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
+  "offender": "AKIAIO5FODNN7EXAMPLE",
+  "commit": "0000000000000000000000000000000000000000",
+  "repo": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_dir_1/server.test.py",
+  "date": "1970-01-01T00:00:00Z",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
+  "offender": "AKIAIO5FODNN7EXAMPLE",
+  "commit": "0000000000000000000000000000000000000000",
+  "repo": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_dir_1/server.test2.py",
+  "date": "1970-01-01T00:00:00Z",
+  "tags": "key, AWS"
+ }
+]

+ 16 - 0
test_data/test_file1_aws_leak.json

@@ -0,0 +1,16 @@
+[
+ {
+  "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
+  "offender": "AKIAIO5FODNN7EXAMPLE",
+  "commit": "0000000000000000000000000000000000000000",
+  "repo": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "",
+  "author": "",
+  "email": "",
+  "file": "../test_data/test_repos/test_dir_1/server.test.py",
+  "date": "1970-01-01T00:00:00Z",
+  "tags": "key, AWS"
+ }
+]

+ 217 - 44
test_data/test_local_owner_aws_leak.json

@@ -5,44 +5,47 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -50,44 +53,175 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 4,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "more secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "deea550dd6c7acaf0e59432600593533984a2125",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "dev branch\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:35:03-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 4,
+  "offender": "AKIALALEMEL33243OLIB",
+  "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
+  "repo": "test_repo_3",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding another one\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:12:08-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
+  "repo": "test_repo_3",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "wait this is actually adding an aws secret\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:01:27-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
+  "repo": "test_repo_3",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding aws key\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T12:58:39-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "more secrets\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:54:08-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "deea550dd6c7acaf0e59432600593533984a2125",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "dev branch\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:35:03-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 4,
+  "offender": "AKIALALEMEL33243OLIB",
+  "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding another one\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:12:08-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "wait this is actually adding an aws secret\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:01:27-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding aws key\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T12:58:39-04:00",
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
@@ -95,14 +229,15 @@
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "ca71fcdeda15f25f0cc661d90e8785c255925c27",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "introduce more secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:08:04-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE'",
@@ -110,14 +245,31 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "1f2a4abc47dabf991e6af6f9770867ce0ac1f360",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "introduce secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:07:34-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
+  "offender": "AKIAIO5FODNN7EXAMPLE",
+  "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
+  "repo": "test_repo_6",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "commit 1 with secrets\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "server.test.py",
+  "date": "2019-10-24T09:29:27-04:00",
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
@@ -125,14 +277,15 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "Adding some secrets in config folder\n\n",
   "author": "Noel Algora",
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -140,14 +293,15 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "748f11eaf2c38c3cf0ac6a22e44208777e79fa6f",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "Merge pull request #1 from zricethezav/additional-secret-branch\n\nUpdate dummy.txt",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
   "file": "dummy.txt",
   "date": "2020-07-25T14:44:48-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -155,14 +309,15 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "ce7e8177bbf8a172c06b6a1e370a374d5c19f660",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "dummy.txt w/ text",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
   "file": "dummy.txt",
   "date": "2020-07-25T14:39:11-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -170,28 +325,46 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "9267bc86ec1497471cbc6f3308f3527f7ef34b9d",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "Update dummy.txt",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
   "file": "dummy.txt",
   "date": "2020-07-25T14:41:11-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
+  "offender": "AKIAIO5FODNN7EXAMPLE",
+  "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
+  "repo": "test_repo_9",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "commit 1 with secrets\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "server.test.py",
+  "date": "2019-10-24T09:29:27-04:00",
+  "tags": "key, AWS"
  },
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE', #gitleaks:allow",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "8d1fb60d2d80f0590f191ed5ace1e45ef780909a",
   "repo": "test_repo_9",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "gitleaks allow secret\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "server.test.py",
   "date": "2020-08-12T13:36:20-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 213 - 56
test_data/test_local_owner_aws_leak_allowlist_repo.json

@@ -1,33 +1,35 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -35,44 +37,175 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 4,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "more secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "deea550dd6c7acaf0e59432600593533984a2125",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "dev branch\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:35:03-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 4,
+  "offender": "AKIALALEMEL33243OLIB",
+  "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
+  "repo": "test_repo_3",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding another one\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:12:08-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
+  "repo": "test_repo_3",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "wait this is actually adding an aws secret\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:01:27-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
+  "repo": "test_repo_3",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding aws key\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T12:58:39-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "more secrets\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:54:08-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "deea550dd6c7acaf0e59432600593533984a2125",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "dev branch\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:35:03-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 4,
+  "offender": "AKIALALEMEL33243OLIB",
+  "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding another one\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:12:08-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "wait this is actually adding an aws secret\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T13:01:27-04:00",
+  "tags": "key, AWS"
+ },
+ {
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
+  "offender": "AKIALALEMEL33243OLIA",
+  "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
+  "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "adding aws key\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "secrets.md",
+  "date": "2019-10-25T12:58:39-04:00",
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
@@ -80,14 +213,15 @@
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "ca71fcdeda15f25f0cc661d90e8785c255925c27",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "introduce more secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:08:04-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE'",
@@ -95,29 +229,15 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "1f2a4abc47dabf991e6af6f9770867ce0ac1f360",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "introduce secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:07:34-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
- },
- {
-  "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
-  "lineNumber": 3,
-  "offender": "AKIAIO5FODNN7EXAMPLE",
-  "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
-  "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
-  "commitMessage": "Adding some secrets in config folder\n\n",
-  "author": "Noel Algora",
-  "email": "noealgigu@gmail.com",
-  "file": "config/application.properties",
-  "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -125,14 +245,31 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
+ },
+ {
+  "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
+  "lineNumber": 3,
+  "offender": "AKIAIO5FODNN7EXAMPLE",
+  "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
+  "repo": "test_repo_6",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "Adding some secrets in config folder\n\n",
+  "author": "Noel Algora",
+  "email": "noealgigu@gmail.com",
+  "file": "config/application.properties",
+  "date": "2020-02-24T14:13:15-05:00",
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -140,14 +277,15 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "748f11eaf2c38c3cf0ac6a22e44208777e79fa6f",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "Merge pull request #1 from zricethezav/additional-secret-branch\n\nUpdate dummy.txt",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
   "file": "dummy.txt",
   "date": "2020-07-25T14:44:48-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -155,14 +293,15 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "ce7e8177bbf8a172c06b6a1e370a374d5c19f660",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "dummy.txt w/ text",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
   "file": "dummy.txt",
   "date": "2020-07-25T14:39:11-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -170,28 +309,46 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "9267bc86ec1497471cbc6f3308f3527f7ef34b9d",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "Update dummy.txt",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
   "file": "dummy.txt",
   "date": "2020-07-25T14:41:11-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
+ },
+ {
+  "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
+  "offender": "AKIAIO5FODNN7EXAMPLE",
+  "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
+  "repo": "test_repo_9",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "commit 1 with secrets\n",
+  "author": "zach rice",
+  "email": "zricer@protonmail.com",
+  "file": "server.test.py",
+  "date": "2019-10-24T09:29:27-04:00",
+  "tags": "key, AWS"
  },
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE', #gitleaks:allow",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "8d1fb60d2d80f0590f191ed5ace1e45ef780909a",
   "repo": "test_repo_9",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "gitleaks allow secret\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "server.test.py",
   "date": "2020-08-12T13:36:20-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 31 - 24
test_data/test_local_owner_aws_leak_depth_2.json

@@ -5,44 +5,47 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 4,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "more secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
@@ -50,14 +53,15 @@
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "ca71fcdeda15f25f0cc661d90e8785c255925c27",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "introduce more secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:08:04-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
@@ -65,14 +69,15 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "Adding some secrets in config folder\n\n",
   "author": "Noel Algora",
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
@@ -80,28 +85,30 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "748f11eaf2c38c3cf0ac6a22e44208777e79fa6f",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "Merge pull request #1 from zricethezav/additional-secret-branch\n\nUpdate dummy.txt",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
   "file": "dummy.txt",
   "date": "2020-07-25T14:44:48-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE', #gitleaks:allow",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "8d1fb60d2d80f0590f191ed5ace1e45ef780909a",
   "repo": "test_repo_9",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "gitleaks allow secret\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
   "file": "server.test.py",
   "date": "2020-08-12T13:36:20-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 3 - 3
test_data/test_local_repo_eight.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "748f11eaf2c38c3cf0ac6a22e44208777e79fa6f",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "Merge pull request #1 from zricethezav/additional-secret-branch\n\nUpdate dummy.txt",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
@@ -20,7 +20,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "ce7e8177bbf8a172c06b6a1e370a374d5c19f660",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "dummy.txt w/ text",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",
@@ -35,7 +35,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "9267bc86ec1497471cbc6f3308f3527f7ef34b9d",
   "repo": "test_repo_8",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "Update dummy.txt",
   "author": "Zachary Rice",
   "email": "zricer@protonmail.com",

+ 2 - 2
test_data/test_local_repo_five_files_at_commit.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "even more secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
@@ -20,7 +20,7 @@
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "even more secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",

+ 2 - 2
test_data/test_local_repo_five_files_at_latest_commit.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "even more secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",
@@ -20,7 +20,7 @@
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "even more secrets\n",
   "author": "Zach Rice",
   "email": "zrice@gitlab.com",

+ 8 - 6
test_data/test_local_repo_four_alt_config_entropy.json

@@ -1,32 +1,34 @@
 [
  {
   "line": "const AWSSECRET = \"99432bfewaf823ec3294e231\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "\"99432bfewaf823ec3294e231\"",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
   "rule": "entropy",
   "commitMessage": "more secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "entropy",
-  "operation": "addition"
+  "tags": "entropy"
  },
  {
   "line": "    const AWSSECRET = \"99432bfewaf823ec3294e231\"",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "\"99432bfewaf823ec3294e231\"",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_4",
+  "repoURL": "",
+  "leakURL": "",
   "rule": "entropy",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "entropy",
-  "operation": "addition"
+  "tags": "entropy"
  }
 ]

+ 10 - 8
test_data/test_local_repo_four_leaks_commit_timerange.json

@@ -1,32 +1,34 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_4",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_4",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 1 - 1
test_data/test_local_repo_nine_aws_leak.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_9",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",

+ 1 - 1
test_data/test_local_repo_one_aws_leak.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",

+ 1 - 1
test_data/test_local_repo_one_aws_leak_and_file_leak.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",

+ 1 - 1
test_data/test_local_repo_one_aws_leak_commit.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",

+ 5 - 4
test_data/test_local_repo_one_aws_leak_uncommitted.json

@@ -5,13 +5,14 @@
   "offender": "AKIAIO5FODNN7DXAMPLE",
   "commit": "0000000000000000000000000000000000000000",
   "repo": "test_repo_1",
-  "rule": "AWS Manager ID",
-  "commitMessage": "***STAGED CHANGES***",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
+  "commitMessage": "",
   "author": "",
   "email": "",
   "file": "server.test.py",
   "date": "1970-01-01T00:00:00Z",
-  "tags": "key, AWS",
-  "operation": "equal"
+  "tags": "key, AWS"
  }
 ]

+ 2 - 2
test_data/test_local_repo_seven_aws_leak_uncommitted.json

@@ -5,8 +5,8 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "0000000000000000000000000000000000000000",
   "repo": "test_repo_7",
-  "rule": "AWS Manager ID",
-  "commitMessage": "***STAGED CHANGES***",
+  "rule": "AWS Access Key",
+  "commitMessage": "",
   "author": "",
   "email": "",
   "file": "file",

+ 1 - 1
test_data/test_local_repo_six.json

@@ -33,7 +33,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "95cf0237f8cdfc0686a29df08260d8635f46c9b0",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "AWS Key in properties\n\n",
   "author": "Noel Algora",
   "email": "noealgigu@gmail.com",

+ 1 - 1
test_data/test_local_repo_six_filepath.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "Adding some secrets in config folder\n\n",
   "author": "Noel Algora",
   "email": "noealgigu@gmail.com",

+ 1 - 1
test_data/test_local_repo_six_leaks_since_date.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "Adding some secrets in config folder\n\n",
   "author": "Noel Algora",
   "email": "noealgigu@gmail.com",

+ 1 - 1
test_data/test_local_repo_six_leaks_until_date.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",

+ 1 - 1
test_data/test_local_repo_six_path_globally_allowlisted.json

@@ -5,7 +5,7 @@
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_6",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "commit 1 with secrets\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",

+ 19 - 15
test_data/test_local_repo_three_leaks.json

@@ -1,48 +1,51 @@
 [
  {
   "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "deea550dd6c7acaf0e59432600593533984a2125",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "dev branch\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:35:03-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -50,13 +53,14 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 14 - 11
test_data/test_local_repo_three_leaks_with_report_groups.json

@@ -1,33 +1,35 @@
 [
  {
   "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 6,
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "deea550dd6c7acaf0e59432600593533984a2125",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "dev branch\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:35:03-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -35,13 +37,14 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_3",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 5 - 4
test_data/test_local_repo_two_allowlist_commits.json

@@ -1,17 +1,18 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 14 - 11
test_data/test_local_repo_two_leaks.json

@@ -1,33 +1,35 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -35,13 +37,14 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 9 - 7
test_data/test_local_repo_two_leaks_commit_from.json

@@ -1,18 +1,19 @@
 [
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -20,13 +21,14 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 9 - 7
test_data/test_local_repo_two_leaks_commit_range.json

@@ -1,18 +1,19 @@
 [
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -20,13 +21,14 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 5 - 4
test_data/test_local_repo_two_leaks_commit_to.json

@@ -1,17 +1,18 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
-  "lineNumber": 5,
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 5 - 4
test_data/test_local_repo_two_leaks_commit_to_from.json

@@ -1,17 +1,18 @@
 [
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 6 - 6
test_data/test_local_repo_two_leaks_deletion.json

@@ -5,7 +5,7 @@
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "f61cd8587b7ac1d75a89a0c9af870a2f24c60263",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "rm secrets again\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
@@ -20,7 +20,7 @@
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "adding another one\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
@@ -35,7 +35,7 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "996865bb912f3bc45898a370a13aadb315014b55",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "committing pem\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
@@ -50,7 +50,7 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
@@ -65,7 +65,7 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
@@ -80,7 +80,7 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",

+ 9 - 7
test_data/test_local_repo_two_leaks_file_commit_range.json

@@ -1,18 +1,19 @@
 [
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
-  "lineNumber": 5,
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "wait this is actually adding an aws secret\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
@@ -20,13 +21,14 @@
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
-  "rule": "AWS Manager ID",
+  "repoURL": "",
+  "leakURL": "",
+  "rule": "AWS Access Key",
   "commitMessage": "adding aws key\n",
   "author": "zach rice",
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS",
-  "operation": "addition"
+  "tags": "key, AWS"
  }
 ]

+ 1 - 0
test_data/test_regex_allowlist.json.got

@@ -0,0 +1 @@
+null

+ 1 - 0
test_data/test_repos/no_repo/tmp.tmp

@@ -0,0 +1 @@
+tmp

+ 9 - 0
test_data/test_repos/test_dir_1/server.test.py

@@ -0,0 +1,9 @@
+# Do not hard code credentials
+client = boto3.client(
+    's3',
+    # Hard coded strings as credentials, not recommended.
+    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
+    aws_secret_access_key='ABCDEF+c2L7yXeGvUyrPgYsDnWRRC1AYEXAMPLE'
+)
+
+# adding another line

+ 9 - 0
test_data/test_repos/test_dir_1/server.test2.py

@@ -0,0 +1,9 @@
+# Do not hard code credentials
+client = boto3.client(
+    's3',
+    # Hard coded strings as credentials, not recommended.
+    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
+    aws_secret_access_key='ABCDEF+c2L7yXeGvUyrPgYsDnWRRC1AYEXAMPLE'
+)
+
+# adding another line

+ 5 - 4
test_data/test_repos/test_repo_4/gitleaks.toml

@@ -2,7 +2,8 @@
 	description = "entropy"
 	regex = '''['|"]([0-9a-zA-Z-._{}$\/\+=]{20,120})['|"]'''
 	tags = ["entropy"]
-		[[rules.Entropies]]
-			Min = "3.3"
-			Max = "3.5"
-			Group = "1"
+        [[rules.Entropies]]
+            Min = "3.3"
+            Max = "3.5"
+            Group = "1"
+