Просмотр исходного кода

Add line number to report plus some other things (#409)

* Init refactor

* think i got it working

* we got em

* Updating tests

* fixing tests

* Better comments, more test fixing

* Bumping to v5

* Fixing golint messages

* Replacing usage of audit with scan in comments

* bumping go-git

* Updating to v5 in some other spots
Zachary Rice 5 лет назад
Родитель
Сommit
6ca7a11d88
50 измененных файлов с 1483 добавлено и 1167 удалено
  1. 1 1
      .travis.yml
  2. 2 2
      Makefile
  3. 0 65
      audit/audit.go
  4. 0 430
      audit/repo.go
  5. 0 485
      audit/util.go
  6. 5 5
      config/config.go
  7. 2 1
      config/config_test.go
  8. 1 1
      examples/regex_and_entropy_config.toml
  9. 2 2
      go.mod
  10. 4 0
      go.sum
  11. 19 15
      hosts/github.go
  12. 10 10
      hosts/gitlab.go
  13. 11 6
      hosts/host.go
  14. 4 3
      hosts/hosts_test.go
  15. 12 12
      main.go
  16. 12 10
      manager/manager.go
  17. 8 8
      manager/manager_test.go
  18. 15 15
      options/options.go
  19. 296 0
      scan/repo.go
  20. 396 0
      scan/rule.go
  21. 455 0
      scan/scan.go
  22. 19 19
      scan/scan_test.go
  23. 29 11
      test_data/test_local_owner_aws_leak.json
  24. 16 6
      test_data/test_local_owner_aws_leak_depth_2.json
  25. 29 11
      test_data/test_local_owner_aws_leak_whitelist_repo.json
  26. 4 2
      test_data/test_local_repo_five_at_latest_commit.json
  27. 4 2
      test_data/test_local_repo_five_commit.json
  28. 6 2
      test_data/test_local_repo_five_files_at_commit.json
  29. 6 2
      test_data/test_local_repo_five_files_at_latest_commit.json
  30. 6 2
      test_data/test_local_repo_four_alt_config_entropy.json
  31. 6 2
      test_data/test_local_repo_four_leaks_commit_timerange.json
  32. 3 1
      test_data/test_local_repo_one_aws_leak.json
  33. 9 3
      test_data/test_local_repo_one_aws_leak_and_file_leak.json
  34. 3 1
      test_data/test_local_repo_one_aws_leak_commit.json
  35. 3 1
      test_data/test_local_repo_one_aws_leak_uncommitted.json
  36. 3 1
      test_data/test_local_repo_seven_aws_leak_uncommitted.json
  37. 6 2
      test_data/test_local_repo_six_filename.json
  38. 3 1
      test_data/test_local_repo_six_filepath.json
  39. 3 1
      test_data/test_local_repo_six_filepath_filename.json
  40. 3 1
      test_data/test_local_repo_six_leaks_since_date.json
  41. 3 1
      test_data/test_local_repo_six_leaks_until_date.json
  42. 3 1
      test_data/test_local_repo_six_path_globally_whitelisted.json
  43. 13 5
      test_data/test_local_repo_three_leaks.json
  44. 10 4
      test_data/test_local_repo_two_leaks.json
  45. 7 3
      test_data/test_local_repo_two_leaks_commit_from.json
  46. 3 1
      test_data/test_local_repo_two_leaks_commit_range.json
  47. 3 1
      test_data/test_local_repo_two_leaks_commit_to.json
  48. 19 7
      test_data/test_local_repo_two_leaks_deletion.json
  49. 3 1
      test_data/test_local_repo_two_whitelist_commits.json
  50. 3 1
      test_data/test_regex_entropy.json

+ 1 - 1
.travis.yml

@@ -1,6 +1,6 @@
 language: go
 go:
-- 1.13.x
+- 1.14.x
 services:
 - docker
 script:

+ 2 - 2
Makefile

@@ -2,8 +2,8 @@
 
 VERSION := `git fetch --tags && git tag | sort -V | tail -1`
 PKG=github.com/zricethezav/gitleaks
-LDFLAGS=-ldflags "-X=github.com/zricethezav/gitleaks/v4/version.Version=$(VERSION)"
-_LDFLAGS="github.com/zricethezav/gitleaks/v4/version.Version=$(VERSION)"
+LDFLAGS=-ldflags "-X=github.com/zricethezav/gitleaks/v5/version.Version=$(VERSION)"
+_LDFLAGS="github.com/zricethezav/gitleaks/v5/version.Version=$(VERSION)"
 COVER=--cover --coverprofile=cover.out
 
 test-cover:

+ 0 - 65
audit/audit.go

@@ -1,65 +0,0 @@
-package audit
-
-import (
-	"fmt"
-	"io/ioutil"
-	"path"
-
-	"github.com/zricethezav/gitleaks/v4/manager"
-
-	log "github.com/sirupsen/logrus"
-)
-
-// Run accepts a manager and begins an audit based on the options/configs set in the manager.
-func Run(m *manager.Manager) error {
-	if m.Opts.OwnerPath != "" {
-		files, err := ioutil.ReadDir(m.Opts.OwnerPath)
-		if err != nil {
-			return err
-		}
-		for _, f := range files {
-			if !f.IsDir() {
-				continue
-			}
-			m.Opts.RepoPath = fmt.Sprintf("%s/%s", m.Opts.OwnerPath, f.Name())
-			if err := runHelper(NewRepo(m)); err != nil {
-				log.Warnf("%s is not a git repo, skipping", f.Name())
-			}
-		}
-		return nil
-	}
-
-	return runHelper(NewRepo(m))
-}
-
-func runHelper(r *Repo) error {
-	// Ignore whitelisted repos
-	for _, wlRepo := range r.Manager.Config.Whitelist.Repos {
-		if RegexMatched(r.Manager.Opts.RepoPath, wlRepo) {
-			return nil
-		}
-		if RegexMatched(r.Manager.Opts.Repo, wlRepo) {
-			return nil
-		}
-	}
-	if r.Manager.Opts.OpenLocal() {
-		r.Name = path.Base(r.Manager.Opts.RepoPath)
-		if err := r.Open(); err != nil {
-			return err
-		}
-
-		// Check if we are checking uncommitted files. This is the default behavior
-		// for a "$ gitleaks" command with no options set
-		if r.Manager.Opts.CheckUncommitted() {
-			if err := r.AuditUncommitted(); err != nil {
-				return err
-			}
-			return nil
-		}
-	} else {
-		if err := r.Clone(nil); err != nil {
-			return err
-		}
-	}
-	return r.Audit()
-}

+ 0 - 430
audit/repo.go

@@ -1,430 +0,0 @@
-package audit
-
-import (
-	"bytes"
-	"context"
-	"crypto/md5"
-	"fmt"
-	"github.com/go-git/go-git/v5"
-	"io"
-	"os"
-	"path"
-	"path/filepath"
-	"sync"
-	"time"
-
-	"github.com/zricethezav/gitleaks/v4/config"
-	"github.com/zricethezav/gitleaks/v4/manager"
-
-	"github.com/BurntSushi/toml"
-	"github.com/go-git/go-billy/v5"
-	"github.com/go-git/go-git/v5/plumbing"
-	"github.com/go-git/go-git/v5/plumbing/object"
-	"github.com/go-git/go-git/v5/plumbing/storer"
-	"github.com/go-git/go-git/v5/storage/memory"
-	"github.com/sergi/go-diff/diffmatchpatch"
-	log "github.com/sirupsen/logrus"
-)
-
-// Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
-// Commits are inspected from the *git.Repository object. If a commit is found then we send it
-// via the manager LeakChan where the manager receives and keeps track of all leaks.
-type Repo struct {
-	*git.Repository
-
-	// config is used when the --repo-config option is set.
-	// This allows users to load up configs specific to their repos.
-	// Imagine the scenario where you are doing an audit of a large organization
-	// and you want certain repos to look for specific rules. If those specific repos
-	// have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
-	// for those repo audits.
-	config config.Config
-
-	// ctx is used to signal timeouts to running goroutines
-	ctx    context.Context
-	cancel context.CancelFunc
-
-	Name    string
-	Manager *manager.Manager
-}
-
-// NewRepo initializes and returns a Repo struct.
-func NewRepo(m *manager.Manager) *Repo {
-	return &Repo{
-		Manager: m,
-		config:  m.Config,
-		ctx:     context.Background(),
-	}
-}
-
-// Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
-// is determined by the clone options set in Manager.metadata.cloneOptions
-func (repo *Repo) Clone(cloneOption *git.CloneOptions) error {
-	var (
-		repository *git.Repository
-		err        error
-	)
-	if cloneOption == nil {
-		cloneOption = repo.Manager.CloneOptions
-	}
-
-	log.Infof("cloning... %s", cloneOption.URL)
-	start := time.Now()
-
-	if repo.Manager.CloneDir != "" {
-		clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
-		repository, err = git.PlainClone(clonePath, false, cloneOption)
-	} else {
-		repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
-	}
-	if err != nil {
-		return err
-	}
-	repo.Name = filepath.Base(repo.Manager.Opts.Repo)
-	repo.Repository = repository
-	repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
-
-	return nil
-}
-
-func emptyCommit() *object.Commit {
-	return &object.Commit{
-		Hash:    plumbing.Hash{},
-		Message: "***STAGED CHANGES***",
-		Author: object.Signature{
-			Name:  "",
-			Email: "",
-			When:  time.Unix(0, 0).UTC(),
-		},
-	}
-}
-
-// auditEmpty audits an empty repo without any commits. See https://github.com/zricethezav/gitleaks/issues/352
-func (repo *Repo) auditEmpty() error {
-	auditTimeStart := time.Now()
-	wt, err := repo.Worktree()
-	if err != nil {
-		return err
-	}
-
-	status, err := wt.Status()
-	for fn := range status {
-		workTreeBuf := bytes.NewBuffer(nil)
-		workTreeFile, err := wt.Filesystem.Open(fn)
-		if err != nil {
-			continue
-		}
-		if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
-			return err
-		}
-		InspectFile(workTreeBuf.String(), workTreeFile.Name(), emptyCommit(), repo)
-	}
-	repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
-	return nil
-}
-
-// AuditUncommitted will do a `git diff` and scan changed files that are being tracked. This is useful functionality
-// for a pre-commit hook so you can make sure your code does not have any leaks before committing.
-func (repo *Repo) AuditUncommitted() error {
-	// load up alternative config if possible, if not use manager's config
-	if repo.Manager.Opts.RepoConfig {
-		cfg, err := repo.loadRepoConfig()
-		if err != nil {
-			return err
-		}
-		repo.config = cfg
-	}
-
-	if err := repo.setupTimeout(); err != nil {
-		return err
-	}
-
-	r, err := repo.Head()
-	if err == plumbing.ErrReferenceNotFound {
-		// possibly an empty repo, or maybe its not, either way lets scan all the files in the directory
-		return repo.auditEmpty()
-	} else if err != nil {
-		return err
-	}
-
-	auditTimeStart := time.Now()
-
-	c, err := repo.CommitObject(r.Hash())
-	if err != nil {
-		return err
-	}
-	// Staged change so the commit details do not yet exist. Insert empty defaults.
-	c.Hash = plumbing.Hash{}
-	c.Message = "***STAGED CHANGES***"
-	c.Author.Name = ""
-	c.Author.Email = ""
-	c.Author.When = time.Unix(0, 0).UTC()
-
-	prevTree, err := c.Tree()
-	if err != nil {
-		return err
-	}
-	wt, err := repo.Worktree()
-	if err != nil {
-		return err
-	}
-
-	status, err := wt.Status()
-	for fn, state := range status {
-		var (
-			prevFileContents string
-			currFileContents string
-			filename         string
-		)
-
-		if state.Staging != git.Untracked {
-			if state.Staging == git.Deleted {
-				// file in staging has been deleted, aka it is not on the filesystem
-				// so the contents of the file are ""
-				currFileContents = ""
-			} else {
-				workTreeBuf := bytes.NewBuffer(nil)
-				workTreeFile, err := wt.Filesystem.Open(fn)
-				if err != nil {
-					continue
-				}
-				if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
-					return err
-				}
-				currFileContents = workTreeBuf.String()
-				filename = workTreeFile.Name()
-			}
-
-			// get files at HEAD state
-			prevFile, err := prevTree.File(fn)
-			if err != nil {
-				prevFileContents = ""
-
-			} else {
-				prevFileContents, err = prevFile.Contents()
-				if err != nil {
-					return err
-				}
-				if filename == "" {
-					filename = prevFile.Name
-				}
-			}
-
-			diffs := diffmatchpatch.New().DiffMain(prevFileContents, currFileContents, false)
-			var diffContents string
-			for _, d := range diffs {
-				if d.Type == diffmatchpatch.DiffInsert {
-					diffContents += fmt.Sprintf("%s\n", d.Text)
-				}
-			}
-
-			InspectFile(diffContents, filename, c, repo)
-		}
-	}
-
-	if err != nil {
-		return err
-	}
-	repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
-	return nil
-}
-
-// Audit is responsible for scanning the entire history (default behavior) of a
-// git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
-// See options/options.go for an explanation on these options.
-func (repo *Repo) Audit() error {
-	if err := repo.setupTimeout(); err != nil {
-		return err
-	}
-	if repo.cancel != nil {
-		defer repo.cancel()
-	}
-
-	if repo.Repository == nil {
-		return fmt.Errorf("%s repo is empty", repo.Name)
-	}
-
-	// load up alternative config if possible, if not use manager's config
-	if repo.Manager.Opts.RepoConfig {
-		cfg, err := repo.loadRepoConfig()
-		if err != nil {
-			return err
-		}
-		repo.config = cfg
-	}
-
-	auditTimeStart := time.Now()
-
-	// audit commit patches OR all files at commit. See https://github.com/zricethezav/gitleaks/issues/326
-	if repo.Manager.Opts.Commit != "" {
-		return inspectCommit(repo.Manager.Opts.Commit, repo, inspectCommitPatches)
-	} else if repo.Manager.Opts.FilesAtCommit != "" {
-		return inspectCommit(repo.Manager.Opts.FilesAtCommit, repo, inspectFilesAtCommit)
-	}
-
-	logOpts, err := getLogOptions(repo)
-	if err != nil {
-		return err
-	}
-	cIter, err := repo.Log(logOpts)
-	if err != nil {
-		return err
-	}
-
-	cc := 0
-	semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
-	wg := sync.WaitGroup{}
-	err = cIter.ForEach(func(c *object.Commit) error {
-		if c == nil || repo.timeoutReached() || repo.depthReached(cc) {
-			return storer.ErrStop
-		}
-
-		// Check if commit is whitelisted
-		if isCommitWhiteListed(c.Hash.String(), repo.config.Whitelist.Commits) {
-			return nil
-		}
-
-		// Check if at root
-		if len(c.ParentHashes) == 0 {
-			cc++
-			err = inspectFilesAtCommit(c, repo)
-			if err != nil {
-				return err
-			}
-			return nil
-		}
-
-		// increase commit counter
-		cc++
-
-		err = c.Parents().ForEach(func(parent *object.Commit) error {
-			defer func() {
-				if err := recover(); err != nil {
-					// sometimes the patch generation will fail due to a known bug in
-					// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
-					// Once a fix has been merged I will remove this recover.
-					return
-				}
-			}()
-			if repo.timeoutReached() {
-				return nil
-			}
-			if parent == nil {
-				// shouldn't reach this point but just in case
-				return nil
-			}
-
-			start := time.Now()
-			patch, err := parent.Patch(c)
-			if err != nil {
-				return fmt.Errorf("could not generate patch")
-			}
-			repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
-			wg.Add(1)
-			semaphore <- true
-			go func(c *object.Commit, patch *object.Patch) {
-				defer func() {
-					<-semaphore
-					wg.Done()
-				}()
-				inspectPatch(patch, c, repo)
-			}(c, patch)
-
-			return nil
-		})
-		if c.Hash.String() == repo.Manager.Opts.CommitTo {
-			return storer.ErrStop
-		}
-		return nil
-	})
-
-	wg.Wait()
-	repo.Manager.RecordTime(manager.AuditTime(howLong(auditTimeStart)))
-	repo.Manager.IncrementCommits(cc)
-	return nil
-}
-
-// Open opens a local repo either from repo-path or $PWD
-func (repo *Repo) Open() error {
-	if repo.Manager.Opts.RepoPath != "" {
-		// open git repo from repo path
-		repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
-		if err != nil {
-			return err
-		}
-		repo.Repository = repository
-	} else {
-		// open git repo from PWD
-		dir, err := os.Getwd()
-		if err != nil {
-			return err
-		}
-		repository, err := git.PlainOpen(dir)
-		if err != nil {
-			return err
-		}
-		repo.Repository = repository
-		repo.Name = path.Base(dir)
-	}
-	return nil
-}
-
-func (repo *Repo) loadRepoConfig() (config.Config, error) {
-	wt, err := repo.Repository.Worktree()
-	if err != nil {
-		return config.Config{}, err
-	}
-	var f billy.File
-	f, _ = wt.Filesystem.Open(".gitleaks.toml")
-	if f == nil {
-		f, err = wt.Filesystem.Open("gitleaks.toml")
-		if err != nil {
-			return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
-		}
-	}
-	defer f.Close()
-	var tomlLoader config.TomlLoader
-	_, err = toml.DecodeReader(f, &tomlLoader)
-	return tomlLoader.Parse()
-}
-
-// timeoutReached returns true if the timeout deadline has been met. This function should be used
-// at the top of loops and before potentially long running goroutines (like checking inefficient regexes)
-func (repo *Repo) timeoutReached() bool {
-	if repo.ctx.Err() == context.DeadlineExceeded {
-		return true
-	}
-	return false
-}
-
-// setupTimeout parses the --timeout option and assigns a context with timeout to the manager
-// which will exit early if the timeout has been met.
-func (repo *Repo) setupTimeout() error {
-	if repo.Manager.Opts.Timeout == "" {
-		return nil
-	}
-	timeout, err := time.ParseDuration(repo.Manager.Opts.Timeout)
-	if err != nil {
-		return err
-	}
-
-	repo.ctx, repo.cancel = context.WithTimeout(context.Background(), timeout)
-
-	go func() {
-		select {
-		case <-repo.ctx.Done():
-			if repo.timeoutReached() {
-				log.Warnf("Timeout deadline (%s) exceeded for %s", timeout.String(), repo.Name)
-			}
-		}
-	}()
-	return nil
-}
-
-func (repo *Repo) depthReached(i int) bool {
-	if repo.Manager.Opts.Depth != 0 && repo.Manager.Opts.Depth == i {
-		log.Warnf("Exceeded depth limit (%d)", i)
-		return true
-	}
-	return false
-}

+ 0 - 485
audit/util.go

@@ -1,485 +0,0 @@
-package audit
-
-import (
-	"fmt"
-	"math"
-	"path/filepath"
-	"regexp"
-	"runtime"
-	"strings"
-	"time"
-
-	"github.com/zricethezav/gitleaks/v4/config"
-	"github.com/zricethezav/gitleaks/v4/manager"
-
-	"github.com/go-git/go-git/v5"
-	"github.com/go-git/go-git/v5/plumbing"
-	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
-	"github.com/go-git/go-git/v5/plumbing/object"
-	log "github.com/sirupsen/logrus"
-)
-
-// Inspect patch accepts a patch, commit, and repo. If the patches contains files that are
-// binary, then gitleaks will skip auditing that file OR if a file is matched on
-// whitelisted files set in the configuration. If a global rule for files is defined and a filename
-// matches said global rule, then a leak is sent to the manager.
-// After that, file chunks are created which are then inspected by InspectString()
-func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
-	for _, f := range patch.FilePatches() {
-		if repo.timeoutReached() {
-			return
-		}
-		if f.IsBinary() {
-			continue
-		}
-		for _, chunk := range f.Chunks() {
-			if chunk.Type() == fdiff.Add || (repo.Manager.Opts.Deletion && chunk.Type() == fdiff.Delete){
-				InspectFile(chunk.Content(), getFileFullPath(f), c, repo)
-			}
-		}
-	}
-}
-
-// getFileName accepts a file patch and returns the filename
-func getFileFullPath(f fdiff.FilePatch) string {
-	fn := "???"
-	from, to := f.Files()
-	if from != nil {
-		return from.Path()
-	} else if to != nil {
-		return to.Path()
-	}
-
-	return fn
-}
-
-// getFileName accepts a string with full path and returns only path
-func getFilePath(fullpath string) string {
-	return filepath.Dir(fullpath)
-}
-
-// getFileName accepts a string with full path and returns only filename
-func getFileName(fullpath string) string {
-	return filepath.Base(fullpath)
-}
-
-// aws_access_key_id='AKIAIO5FODNN7EXAMPLE',
-// trippedEntropy checks if a given capture group or offender falls in between entropy ranges
-// supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
-func trippedEntropy(groups []string, rule config.Rule) bool {
-	for _, e := range rule.Entropies {
-		if len(groups) > e.Group {
-			entropy := shannonEntropy(groups[e.Group])
-			if entropy >= e.Min && entropy <= e.Max {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-// getShannonEntropy https://en.wiktionary.org/wiki/Shannon_entropy
-func shannonEntropy(data string) (entropy float64) {
-	if data == "" {
-		return 0
-	}
-
-	charCounts := make(map[rune]int)
-	for _, char := range data {
-		charCounts[char]++
-	}
-
-	invLength := 1.0 / float64(len(data))
-	for _, count := range charCounts {
-		freq := float64(count) * invLength
-		entropy -= freq * math.Log2(freq)
-	}
-
-	return entropy
-}
-
-// Checks if the given rule has a regex
-func ruleContainRegex(rule config.Rule) bool {
-	if rule.Regex == nil {
-		return false
-	}
-	if rule.Regex.String() == "" {
-		return false
-	}
-	return true
-}
-
-// Checks if the given rule has a file name regex
-func ruleContainFileNameRegex(rule config.Rule) bool {
-	if rule.FileNameRegex == nil {
-		return false
-	}
-	if rule.FileNameRegex.String() == "" {
-		return false
-	}
-	return true
-}
-
-// Checks if the given rule has a file path regex
-func ruleContainFilePathRegex(rule config.Rule) bool {
-	if rule.FilePathRegex == nil {
-		return false
-	}
-	if rule.FilePathRegex.String() == "" {
-		return false
-	}
-	return true
-}
-
-func sendLeak(offender string, line string, filename string, rule config.Rule, c *object.Commit, repo *Repo) {
-	repo.Manager.SendLeaks(manager.Leak{
-		Line:     line,
-		Offender: offender,
-		Commit:   c.Hash.String(),
-		Repo:     repo.Name,
-		Message:  c.Message,
-		Rule:     rule.Description,
-		Author:   c.Author.Name,
-		Email:    c.Author.Email,
-		Date:     c.Author.When,
-		Tags:     strings.Join(rule.Tags, ", "),
-		File:     filename,
-	})
-}
-
-// InspectFile accepts a file content, fullpath of file, commit and repo. If the file is
-// binary OR if a file is matched on whitelisted files set in the configuration, then gitleaks
-// will skip auditing that file. It will check first if rules apply to this file comparing filename
-// and path to their respective rule regexes and inspect file content with inspectFileContents after.
-func InspectFile(content string, fullpath string, c *object.Commit, repo *Repo) {
-
-	filename := getFileName(fullpath)
-	path := getFilePath(fullpath)
-
-	// We want to check if there is a whitelist for this file
-	if len(repo.config.Whitelist.Files) != 0 {
-		for _, reFileName := range repo.config.Whitelist.Files {
-			if RegexMatched(filename, reFileName) {
-				log.Debugf("whitelisted file found, skipping audit of file: %s", filename)
-				return
-			}
-		}
-	}
-
-	// We want to check if there is a whitelist for this path
-	if len(repo.config.Whitelist.Paths) != 0 {
-		for _, reFilePath := range repo.config.Whitelist.Paths {
-			if RegexMatched(path, reFilePath) {
-				log.Debugf("file in whitelisted path found, skipping audit of file: %s", filename)
-				return
-			}
-		}
-	}
-
-	for _, rule := range repo.config.Rules {
-		start := time.Now()
-
-		// For each rule we want to check filename whitelists
-		if isFileNameWhiteListed(filename, rule.Whitelist) || isFilePathWhiteListed(path, rule.Whitelist) {
-			continue
-		}
-
-		// If it has fileNameRegex and it doesnt match we continue to next rule
-		if ruleContainFileNameRegex(rule) && !RegexMatched(filename, rule.FileNameRegex) {
-			continue
-		}
-
-		// If it has filePathRegex and it doesnt match we continue to next rule
-		if ruleContainFilePathRegex(rule) && !RegexMatched(path, rule.FilePathRegex) {
-			continue
-		}
-
-		// If it doesnt contain a content regex then it is a filename regex match
-		if !ruleContainRegex(rule) {
-			sendLeak("Filename/path offender: "+filename, "N/A", fullpath, rule, c, repo)
-		} else {
-			//otherwise we check if it matches content regex
-			inspectFileContents(content, fullpath, rule, c, repo)
-		}
-
-		//	TODO should return filenameRegex if only file rule
-		repo.Manager.RecordTime(manager.RegexTime{
-			Time:  howLong(start),
-			Regex: rule.Regex.String(),
-		})
-	}
-}
-
-// InspectString accepts a string, commit object, repo, and filename. This function iterates over
-// all the rules set by the gitleaks config. If the rule contains entropy checks then entropy will be checked first.
-// Next, if the rule contains a regular expression then that will be checked.
-func inspectFileContents(content string, path string, rule config.Rule, c *object.Commit, repo *Repo) {
-	locs := rule.Regex.FindAllIndex([]byte(content), -1)
-	if len(locs) != 0 {
-		for _, loc := range locs {
-			start := loc[0]
-			end := loc[1]
-			for start != 0 && content[start] != '\n' {
-				start = start - 1
-			}
-			if start != 0 {
-				// skip newline
-				start = start + 1
-			}
-
-			for end < len(content)-1 && content[end] != '\n' {
-				end = end + 1
-			}
-
-			line := content[start:end]
-			offender := content[loc[0]:loc[1]]
-			groups := rule.Regex.FindStringSubmatch(offender)
-
-			if isOffenderWhiteListed(offender, rule.Whitelist) {
-				continue
-			}
-
-			if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
-				continue
-			}
-
-			sendLeak(offender, line, path, rule, c, repo)
-		}
-	}
-}
-
-type commitInspector func(c *object.Commit, repo *Repo) error
-
-// inspectCommit accepts a commit hash, repo, and commit inspecting function. A new commit
-// object will be created from the hash which will be passed into either inspectCommitPatches
-// or inspectFilesAtCommit depending on the options set.
-func inspectCommit(commit string, repo *Repo, f commitInspector) error {
-	if commit == "latest" {
-		ref, err := repo.Repository.Head()
-		if err != nil {
-			return err
-		}
-		commit = ref.Hash().String()
-	}
-	repo.Manager.IncrementCommits(1)
-	h := plumbing.NewHash(commit)
-	c, err := repo.CommitObject(h)
-	if err != nil {
-		return err
-	}
-	return f(c, repo)
-}
-
-// inspectCommitPatches accepts a commit object and a repo. This function is only called when the --commit=
-// option has been set. That option tells gitleaks to look only at a single commit and check the contents
-// of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
-// whitelisted then those files will be skipped.
-func inspectCommitPatches(c *object.Commit, repo *Repo) error {
-	if len(c.ParentHashes) == 0 {
-		err := inspectFilesAtCommit(c, repo)
-		if err != nil {
-			return err
-		}
-	}
-
-	return c.Parents().ForEach(func(parent *object.Commit) error {
-		defer func() {
-			if err := recover(); err != nil {
-				// sometimes the patch generation will fail due to a known bug in
-				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
-				// Once a fix has been merged I will remove this recover.
-				return
-			}
-		}()
-		if repo.timeoutReached() {
-			return nil
-		}
-		if parent == nil {
-			return nil
-		}
-		start := time.Now()
-		patch, err := parent.Patch(c)
-		if err != nil {
-			return fmt.Errorf("could not generate patch")
-		}
-		repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
-		inspectPatch(patch, c, repo)
-		return nil
-	})
-}
-
-// inspectFilesAtCommit accepts a commit object and a repo. This function is only called when the --files-at-commit=
-// option has been set. That option tells gitleaks to look only at ALL the files at a commit and check the contents
-// of said commit. Similar to inspectPatch(), if the files contained in the commit are a binaries or if they are
-// whitelisted then those files will be skipped.
-func inspectFilesAtCommit(c *object.Commit, repo *Repo) error {
-	fIter, err := c.Files()
-	if err != nil {
-		return err
-	}
-
-	err = fIter.ForEach(func(f *object.File) error {
-		bin, err := f.IsBinary()
-		if bin || repo.timeoutReached() {
-			return nil
-		} else if err != nil {
-			return err
-		}
-
-		content, err := f.Contents()
-		if err != nil {
-			return err
-		}
-
-		InspectFile(content, f.Name, c, repo)
-
-		return nil
-	})
-	return err
-}
-
-// howManyThreads will return a number 1-GOMAXPROCS which is the number
-// of goroutines that will spawn during gitleaks execution
-func howManyThreads(threads int) int {
-	maxThreads := runtime.GOMAXPROCS(0)
-	if threads == 0 {
-		return 1
-	} else if threads > maxThreads {
-		log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
-		return maxThreads
-	}
-	return threads
-}
-
-func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
-	for _, hash := range whitelistedCommits {
-		if commitHash == hash {
-			return true
-		}
-	}
-	return false
-}
-
-func isOffenderWhiteListed(offender string, whitelist []config.Whitelist) bool {
-	if len(whitelist) != 0 {
-		for _, wl := range whitelist {
-			if wl.Regex.FindString(offender) != "" {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func isFileNameWhiteListed(filename string, whitelist []config.Whitelist) bool {
-	if len(whitelist) != 0 {
-		for _, wl := range whitelist {
-			if RegexMatched(filename, wl.File) {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-func isFilePathWhiteListed(filepath string, whitelist []config.Whitelist) bool {
-	if len(whitelist) != 0 {
-		for _, wl := range whitelist {
-			if RegexMatched(filepath, wl.Path) {
-				return true
-			}
-		}
-	}
-	return false
-}
-
-// RegexMatched matched an interface to a regular expression. The interface f can
-// be a string type or go-git *object.File type.
-func RegexMatched(f interface{}, re *regexp.Regexp) bool {
-	if re == nil {
-		return false
-	}
-	switch f.(type) {
-	case nil:
-		return false
-	case string:
-		if re.FindString(f.(string)) != "" {
-			return true
-		}
-		return false
-	case *object.File:
-		if re.FindString(f.(*object.File).Name) != "" {
-			return true
-		}
-		return false
-	}
-	return false
-}
-
-// getLogOptions determines what log options are used when iterating through commits.
-// It is similar to `git log {branch}`. Default behavior is to log ALL branches so
-// gitleaks gets the full git history.
-func getLogOptions(repo *Repo) (*git.LogOptions, error) {
-	var logOpts git.LogOptions
-	const dateformat string = "2006-01-02"
-	const timeformat string = "2006-01-02T15:04:05-0700"
-	if repo.Manager.Opts.CommitFrom != "" {
-		logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
-	}
-	if repo.Manager.Opts.CommitSince != "" {
-		if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitSince); err == nil {
-			logOpts.Since = &t
-		} else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitSince); err == nil {
-			logOpts.Since = &t
-		} else {
-			return nil, err
-		}
-	}
-	if repo.Manager.Opts.CommitUntil != "" {
-		if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitUntil); err == nil {
-			logOpts.Until = &t
-		} else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitUntil); err == nil {
-			logOpts.Until = &t
-		} else {
-			return nil, err
-		}
-	}
-	if repo.Manager.Opts.Branch != "" {
-		refs, err := repo.Storer.IterReferences()
-		if err != nil {
-			return nil, err
-		}
-		err = refs.ForEach(func(ref *plumbing.Reference) error {
-			if ref.Name().IsTag() {
-				return nil
-			}
-			// check heads first
-			if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
-				logOpts = git.LogOptions{
-					From: ref.Hash(),
-				}
-				return nil
-			} else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
-				logOpts = git.LogOptions{
-					From: ref.Hash(),
-				}
-				return nil
-			}
-			return nil
-		})
-		if logOpts.From.IsZero() {
-			return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
-		}
-		return &logOpts, nil
-	}
-	if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
-		return &logOpts, nil
-	}
-	return &git.LogOptions{All: true}, nil
-}
-
-// howLong accepts a time.Time object which is subtracted from time.Now() and
-// converted to nanoseconds which is returned
-func howLong(t time.Time) int64 {
-	return time.Now().Sub(t).Nanoseconds()
-}

+ 5 - 5
config/config.go

@@ -6,7 +6,7 @@ import (
 	"regexp"
 	"strconv"
 
-	"github.com/zricethezav/gitleaks/v4/options"
+	"github.com/zricethezav/gitleaks/v5/options"
 
 	"github.com/BurntSushi/toml"
 )
@@ -29,9 +29,9 @@ type Entropy struct {
 
 // Rule is a struct that contains information that is loaded from a gitleaks config.
 // This struct is used in the Config struct as an array of Rules and is iterated
-// over during an audit. Each rule will be checked. If a regex match is found AND
+// over during an scan. Each rule will be checked. If a regex match is found AND
 // that match is not whitelisted (globally or locally), then a leak will be appended
-// to the final audit report.
+// to the final scan report.
 type Rule struct {
 	Description   string
 	Regex         *regexp.Regexp
@@ -87,7 +87,7 @@ type TomlLoader struct {
 }
 
 // NewConfig will create a new config struct which contains
-// rules on how gitleaks will proceed with its audit.
+// rules on how gitleaks will proceed with its scan.
 // If no options are passed via cli then NewConfig will return
 // a default config which can be seen in config.go
 func NewConfig(options options.Options) (Config, error) {
@@ -115,7 +115,7 @@ func NewConfig(options options.Options) (Config, error) {
 }
 
 // Parse will parse the values set in a TomlLoader and use those values
-// to create compiled regular expressions and rules used in audits
+// to create compiled regular expressions and rules used in scans
 func (tomlLoader TomlLoader) Parse() (Config, error) {
 	var cfg Config
 	for _, rule := range tomlLoader.Rules {

+ 2 - 1
config/config_test.go

@@ -2,9 +2,10 @@ package config
 
 import (
 	"fmt"
-	"github.com/zricethezav/gitleaks/v4/options"
 	"regexp"
 	"testing"
+
+	"github.com/zricethezav/gitleaks/v5/options"
 )
 
 func TestParse(t *testing.T) {

+ 1 - 1
examples/regex_and_entropy_config.toml

@@ -1,5 +1,5 @@
 # This config contains a single rule which defines a regex and a range of entropy values. If a rule has
-# both regex and entropy then that rule uses BOTH the regex and entropy in combination when performing an audit.
+# both regex and entropy then that rule uses BOTH the regex and entropy in combination when performing an scan.
 # In other words, if a line of code has an entropy value that is within the range of the entropies defined and
 # a regex match is found then that line of code contains a leak.
 

+ 2 - 2
go.mod

@@ -1,11 +1,11 @@
-module github.com/zricethezav/gitleaks/v4
+module github.com/zricethezav/gitleaks/v5
 
 go 1.14
 
 require (
 	github.com/BurntSushi/toml v0.3.1
 	github.com/go-git/go-billy/v5 v5.0.0
-	github.com/go-git/go-git/v5 v5.0.0
+	github.com/go-git/go-git/v5 v5.1.0
 	github.com/google/go-cmp v0.4.0 // indirect
 	github.com/google/go-github/v31 v31.0.0
 	github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4

+ 4 - 0
go.sum

@@ -25,6 +25,8 @@ github.com/go-git/go-git-fixtures/v4 v4.0.1 h1:q+IFMfLx200Q3scvt2hN79JsEzy4AmBTp
 github.com/go-git/go-git-fixtures/v4 v4.0.1/go.mod h1:m+ICp2rF3jDhFgEZ/8yziagdT1C+ZpZcrJjappBCDSw=
 github.com/go-git/go-git/v5 v5.0.0 h1:k5RWPm4iJwYtfWoxIJy4wJX9ON7ihPeZZYC1fLYDnpg=
 github.com/go-git/go-git/v5 v5.0.0/go.mod h1:oYD8y9kWsGINPFJoLdaScGCN6dlKg23blmClfZwtUVA=
+github.com/go-git/go-git/v5 v5.1.0 h1:HxJn9g/E7eYvKW3Fm7Jt4ee8LXfPOm/H1cdDu8vEssk=
+github.com/go-git/go-git/v5 v5.1.0/go.mod h1:ZKfuPUoY1ZqIG4QG9BDBh3G4gLM5zvPuSJAozQrZuyM=
 github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
@@ -39,6 +41,8 @@ github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASu
 github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
 github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4 h1:60gBOooTSmNtrqNaRvrDbi8VAne0REaek2agjnITKSw=
 github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4/go.mod h1:5Scbynm8dF1XAPwIwkGPqzkM/shndPm79Jd1003hTjE=
+github.com/imdario/mergo v0.3.9 h1:UauaLniWCFHWd+Jp9oCEkTBj8VO/9DKg3PV3VCNMDIg=
+github.com/imdario/mergo v0.3.9/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
 github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
 github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
 github.com/jessevdk/go-flags v1.4.0 h1:4IU2WS7AumrZ/40jfhf4QVDMsQwqA7VEHozFRrGARJA=

+ 19 - 15
hosts/github.go

@@ -6,9 +6,9 @@ import (
 	"strings"
 	"sync"
 
-	"github.com/zricethezav/gitleaks/v4/audit"
-	"github.com/zricethezav/gitleaks/v4/manager"
-	"github.com/zricethezav/gitleaks/v4/options"
+	"github.com/zricethezav/gitleaks/v5/manager"
+	"github.com/zricethezav/gitleaks/v5/options"
+	"github.com/zricethezav/gitleaks/v5/scan"
 
 	"github.com/go-git/go-git/v5"
 	"github.com/go-git/go-git/v5/plumbing"
@@ -27,7 +27,7 @@ type Github struct {
 }
 
 // NewGithubClient accepts a manager struct and returns a Github host pointer which will be used to
-// perform a github audit on an organization, user, or PR.
+// perform a github scan on an organization, user, or PR.
 func NewGithubClient(m *manager.Manager) (*Github, error) {
 	var err error
 	ctx := context.Background()
@@ -50,8 +50,8 @@ func NewGithubClient(m *manager.Manager) (*Github, error) {
 	}, err
 }
 
-// Audit will audit a github user or organization's repos.
-func (g *Github) Audit() {
+// Scan will scan a github user or organization's repos.
+func (g *Github) Scan() {
 	ctx := context.Background()
 	listOptions := github.ListOptions{
 		PerPage: 100,
@@ -105,7 +105,7 @@ func (g *Github) Audit() {
 	}
 
 	for _, repo := range githubRepos {
-		r := audit.NewRepo(g.manager)
+		r := scan.NewRepo(g.manager)
 
 		if g.manager.CloneOptions != nil {
 			auth = g.manager.CloneOptions.Auth
@@ -119,7 +119,7 @@ func (g *Github) Audit() {
 			log.Warn("unable to clone via https and access token, attempting with ssh now")
 			auth, err := options.SSHAuth(g.manager.Opts)
 			if err != nil {
-				log.Warnf("unable to get ssh auth, skipping clone and audit for repo %s: %+v\n", *repo.CloneURL, err)
+				log.Warnf("unable to get ssh auth, skipping clone and scan for repo %s: %+v\n", *repo.CloneURL, err)
 				continue
 			}
 			err = r.Clone(&git.CloneOptions{
@@ -127,26 +127,26 @@ func (g *Github) Audit() {
 				Auth: auth,
 			})
 			if err != nil {
-				log.Warnf("err cloning %s, skipping clone and audit: %+v\n", *repo.SSHURL, err)
+				log.Warnf("err cloning %s, skipping clone and scan: %+v\n", *repo.SSHURL, err)
 				continue
 			}
 		}
-		if err = r.Audit(); err != nil {
+		if err = r.Scan(); err != nil {
 			log.Warn(err)
 		}
 	}
 }
 
-// AuditPR audits a single github PR
-func (g *Github) AuditPR() {
+// ScanPR scan a single github PR
+func (g *Github) ScanPR() {
 	ctx := context.Background()
 	splits := strings.Split(g.manager.Opts.PullRequest, "/")
 	owner := splits[len(splits)-4]
 	repoName := splits[len(splits)-3]
 	prNum, err := strconv.Atoi(splits[len(splits)-1])
-	repo := audit.NewRepo(g.manager)
+	repo := scan.NewRepo(g.manager)
 	repo.Name = repoName
-	log.Infof("auditing pr %s\n", g.manager.Opts.PullRequest)
+	log.Infof("scanning pr %s\n", g.manager.Opts.PullRequest)
 
 	if err != nil {
 		return
@@ -175,7 +175,11 @@ func (g *Github) AuditPR() {
 				if f.Patch == nil {
 					continue
 				}
-				audit.InspectFile(*f.Patch, *f.Filename, &commitObj, repo)
+				repo.CheckRules(&scan.Bundle{
+					Content: *f.Patch,
+					FilePath: *f.Filename,
+					Commit: &commitObj,
+				})
 			}
 		}
 		page = resp.NextPage

+ 10 - 10
hosts/gitlab.go

@@ -4,9 +4,9 @@ import (
 	"context"
 	"sync"
 
-	"github.com/zricethezav/gitleaks/v4/audit"
-	"github.com/zricethezav/gitleaks/v4/manager"
-	"github.com/zricethezav/gitleaks/v4/options"
+	"github.com/zricethezav/gitleaks/v5/manager"
+	"github.com/zricethezav/gitleaks/v5/options"
+	"github.com/zricethezav/gitleaks/v5/scan"
 
 	log "github.com/sirupsen/logrus"
 	"github.com/xanzy/go-gitlab"
@@ -21,7 +21,7 @@ type Gitlab struct {
 }
 
 // NewGitlabClient accepts a manager struct and returns a Gitlab host pointer which will be used to
-// perform a gitlab audit on an group or user.
+// perform a gitlab scan on an group or user.
 func NewGitlabClient(m *manager.Manager) (*Gitlab, error) {
 	var err error
 
@@ -38,8 +38,8 @@ func NewGitlabClient(m *manager.Manager) (*Gitlab, error) {
 	return gitlabClient, err
 }
 
-// Audit will audit a github user or organization's repos.
-func (g *Gitlab) Audit() {
+// Scan will scan a github user or organization's repos.
+func (g *Gitlab) Scan() {
 	var (
 		projects []*gitlab.Project
 		resp     *gitlab.Response
@@ -89,20 +89,20 @@ func (g *Gitlab) Audit() {
 
 	// iterate of gitlab projects
 	for _, p := range projects {
-		r := audit.NewRepo(g.manager)
+		r := scan.NewRepo(g.manager)
 		cloneOpts := g.manager.CloneOptions
 		cloneOpts.URL = p.HTTPURLToRepo
 		err := r.Clone(cloneOpts)
 		// TODO handle clone retry with ssh like github host
 		r.Name = p.Name
 
-		if err = r.Audit(); err != nil {
+		if err = r.Scan(); err != nil {
 			log.Error(err)
 		}
 	}
 }
 
-// AuditPR TODO not implemented
-func (g *Gitlab) AuditPR() {
+// ScanPR TODO not implemented
+func (g *Gitlab) ScanPR() {
 	log.Error("AuditPR is not implemented in Gitlab host yet...")
 }

+ 11 - 6
hosts/host.go

@@ -1,8 +1,9 @@
 package hosts
 
 import (
-	"github.com/zricethezav/gitleaks/v4/manager"
 	"strings"
+
+	"github.com/zricethezav/gitleaks/v5/manager"
 )
 
 const (
@@ -13,11 +14,11 @@ const (
 // Host is an interface used for defining external git hosting providers like github and gitlab.
 // TODO add bitbucket
 type Host interface {
-	Audit()
-	AuditPR()
+	Scan()
+	ScanPR()
 }
 
-// Run kicks off a host audit. This function accepts a manager and determines what host it should audit
+// Run kicks off a host scan. This function accepts a manager and determines what host it should scan
 func Run(m *manager.Manager) error {
 	var host Host
 	var err error
@@ -30,10 +31,14 @@ func Run(m *manager.Manager) error {
 		return nil
 	}
 
+	if err != nil {
+		return err
+	}
+
 	if m.Opts.PullRequest != "" {
-		host.AuditPR()
+		host.ScanPR()
 	} else {
-		host.Audit()
+		host.Scan()
 	}
 	return err
 }

+ 4 - 3
hosts/hosts_test.go

@@ -3,11 +3,12 @@ package hosts
 import (
 	"flag"
 	"fmt"
-	"github.com/zricethezav/gitleaks/v4/config"
-	"github.com/zricethezav/gitleaks/v4/manager"
-	"github.com/zricethezav/gitleaks/v4/options"
 	"os"
 	"testing"
+
+	"github.com/zricethezav/gitleaks/v5/config"
+	"github.com/zricethezav/gitleaks/v5/manager"
+	"github.com/zricethezav/gitleaks/v5/options"
 )
 
 var (

+ 12 - 12
main.go

@@ -5,11 +5,11 @@ import (
 	"os"
 	"time"
 
-	"github.com/zricethezav/gitleaks/v4/audit"
-	"github.com/zricethezav/gitleaks/v4/config"
-	"github.com/zricethezav/gitleaks/v4/hosts"
-	"github.com/zricethezav/gitleaks/v4/manager"
-	"github.com/zricethezav/gitleaks/v4/options"
+	"github.com/zricethezav/gitleaks/v5/config"
+	"github.com/zricethezav/gitleaks/v5/hosts"
+	"github.com/zricethezav/gitleaks/v5/manager"
+	"github.com/zricethezav/gitleaks/v5/options"
+	"github.com/zricethezav/gitleaks/v5/scan"
 
 	"github.com/hako/durafmt"
 	log "github.com/sirupsen/logrus"
@@ -53,7 +53,7 @@ func main() {
 		if m.Opts.CheckUncommitted() {
 			log.Warnf("%d leaks detected in staged changes", len(leaks))
 		} else {
-			log.Warnf("%d leaks detected. %d commits audited in %s", len(leaks),
+			log.Warnf("%d leaks detected. %d commits scanned in %s", len(leaks),
 				metadata.Commits, durafmt.Parse(time.Duration(metadata.AuditTime)*time.Nanosecond))
 		}
 		os.Exit(options.LeaksPresent)
@@ -61,17 +61,17 @@ func main() {
 		if m.Opts.CheckUncommitted() {
 			log.Infof("No leaks detected in staged changes")
 		} else {
-			log.Infof("No leaks detected. %d commits audited in %s",
+			log.Infof("No leaks detected. %d commits scanned in %s",
 				metadata.Commits, durafmt.Parse(time.Duration(metadata.AuditTime)*time.Nanosecond))
 		}
 		os.Exit(options.Success)
 	}
 }
 
-// Run begins the program and contains some basic logic on how to continue with the audit. If any external git host
-// options are set (like auditing a gitlab or github user) then a specific host client will be created and
-// then Audit() and Report() will be called. Otherwise, gitleaks will create a new repo and an audit will proceed.
-// If no options or the uncommitted option is set then a pre-commit audit will
+// Run begins the program and contains some basic logic on how to continue with the scan. If any external git host
+// options are set (like scanning a gitlab or github user) then a specific host client will be created and
+// then Scan() and Report() will be called. Otherwise, gitleaks will create a new repo and an scan will proceed.
+// If no options or the uncommitted option is set then a pre-commit scan will
 // take place -- this is similar to running `git diff` on all the tracked files.
 func Run(m *manager.Manager) error {
 	if m.Opts.Disk {
@@ -87,7 +87,7 @@ func Run(m *manager.Manager) error {
 	if m.Opts.Host != "" {
 		err = hosts.Run(m)
 	} else {
-		err = audit.Run(m)
+		err = scan.Run(m)
 	}
 	if err != nil {
 		return err

+ 12 - 10
manager/manager.go

@@ -14,8 +14,8 @@ import (
 	"text/tabwriter"
 	"time"
 
-	"github.com/zricethezav/gitleaks/v4/config"
-	"github.com/zricethezav/gitleaks/v4/options"
+	"github.com/zricethezav/gitleaks/v5/config"
+	"github.com/zricethezav/gitleaks/v5/options"
 
 	"github.com/go-git/go-git/v5"
 	"github.com/hako/durafmt"
@@ -48,6 +48,7 @@ type Manager struct {
 // sensitive information as determined by the rules set in a gitleaks config
 type Leak struct {
 	Line       string    `json:"line"`
+	LineNumber int       `json:"lineNumber"`
 	Offender   string    `json:"offender"`
 	Commit     string    `json:"commit"`
 	Repo       string    `json:"repo"`
@@ -58,13 +59,14 @@ type Leak struct {
 	File       string    `json:"file"`
 	Date       time.Time `json:"date"`
 	Tags       string    `json:"tags"`
+	Operation  string    `json:"operation"`
 	lookupHash string
 }
 
-// AuditTime is a type used to determine total audit time
+// AuditTime is a type used to determine total scan time
 type AuditTime int64
 
-// PatchTime is a type used to determine total patch time during an audit
+// PatchTime is a type used to determine total patch time during an scan
 type PatchTime int64
 
 // CloneTime is a type used to determine total clone time
@@ -78,7 +80,7 @@ type RegexTime struct {
 	Regex string
 }
 
-// Metadata is a struct used to communicate metadata about an audit like timings and total commit counts.
+// Metadata is a struct used to communicate metadata about an scan like timings and total commit counts.
 type Metadata struct {
 	mux  *sync.Mutex
 	data map[string]interface{}
@@ -147,7 +149,7 @@ func (manager *Manager) GetLeaks() []Leak {
 	return manager.leaks
 }
 
-// SendLeaks accepts a leak and is used by the audit pkg. This is the public function
+// SendLeaks accepts a leak and is used by the scan pkg. This is the public function
 // that allows other packages to send leaks to the manager.
 func (manager *Manager) SendLeaks(l Leak) {
 	if len(l.Line) > maxLineLen {
@@ -157,7 +159,7 @@ func (manager *Manager) SendLeaks(l Leak) {
 		l.Offender = l.Offender[0:maxLineLen-1] + "..."
 	}
 	h := sha1.New()
-	h.Write([]byte(l.Commit + l.Offender + l.File + l.Line))
+	h.Write([]byte(l.Commit + l.Offender + l.File + l.Line + string(l.LineNumber)))
 	l.lookupHash = hex.EncodeToString(h.Sum(nil))
 	if manager.Opts.Redact {
 		l.Line = strings.ReplaceAll(l.Line, l.Offender, "REDACTED")
@@ -223,7 +225,7 @@ func (manager *Manager) receiveMetadata() {
 	}
 }
 
-// IncrementCommits increments total commits during an audit by i.
+// IncrementCommits increments total commits during an scan by i.
 func (manager *Manager) IncrementCommits(i int) {
 	manager.metadata.mux.Lock()
 	manager.metadata.Commits += i
@@ -236,7 +238,7 @@ func (manager *Manager) RecordTime(t interface{}) {
 	manager.metadata.timings <- t
 }
 
-// DebugOutput logs metadata and other messages that occurred during a gitleaks audit
+// DebugOutput logs metadata and other messages that occurred during a gitleaks scan
 func (manager *Manager) DebugOutput() {
 	log.Debugf("-------------------------\n")
 	log.Debugf("| Times and Commit Counts|\n")
@@ -308,6 +310,6 @@ func (manager *Manager) receiveInterrupt() {
 			log.Error(err)
 		}
 	}
-	log.Info("gitleaks received interrupt, stopping audit")
+	log.Info("gitleaks received interrupt, stopping scan")
 	os.Exit(options.ErrorEncountered)
 }

+ 8 - 8
manager/manager_test.go

@@ -3,8 +3,8 @@ package manager
 import (
 	"crypto/rand"
 	"fmt"
-	"github.com/zricethezav/gitleaks/v4/config"
-	"github.com/zricethezav/gitleaks/v4/options"
+	"github.com/zricethezav/gitleaks/v5/config"
+	"github.com/zricethezav/gitleaks/v5/options"
 	"io"
 	"testing"
 )
@@ -45,14 +45,14 @@ func TestSendReceiveLeaks(t *testing.T) {
 
 func TestSendReceiveMeta(t *testing.T) {
 	tests := []struct {
-		auditTime  int64
+		scanTime  int64
 		patchTime  int64
 		cloneTime  int64
 		regexTime  int64
 		iterations int
 	}{
 		{
-			auditTime:  1000,
+			scanTime:  1000,
 			patchTime:  1000,
 			cloneTime:  1000,
 			regexTime:  1000,
@@ -65,7 +65,7 @@ func TestSendReceiveMeta(t *testing.T) {
 		m, _ := NewManager(opts, cfg)
 
 		for i := 0; i < test.iterations; i++ {
-			m.RecordTime(AuditTime(test.auditTime))
+			m.RecordTime(AuditTime(test.scanTime))
 			m.RecordTime(PatchTime(test.patchTime))
 			m.RecordTime(CloneTime(test.cloneTime))
 			m.RecordTime(RegexTime{
@@ -82,9 +82,9 @@ func TestSendReceiveMeta(t *testing.T) {
 			t.Errorf("clone time mismatch, got %d, wanted %d",
 				md.cloneTime, test.cloneTime*int64(test.iterations))
 		}
-		if md.AuditTime != test.auditTime*int64(test.iterations) {
-			t.Errorf("audit time mismatch, got %d, wanted %d",
-				md.AuditTime, test.auditTime*int64(test.iterations))
+		if md.AuditTime != test.scanTime*int64(test.iterations) {
+			t.Errorf("scan time mismatch, got %d, wanted %d",
+				md.AuditTime, test.scanTime*int64(test.iterations))
 		}
 		if md.patchTime != test.patchTime*int64(test.iterations) {
 			t.Errorf("clone time mismatch, got %d, wanted %d",

+ 15 - 15
options/options.go

@@ -7,7 +7,7 @@ import (
 	"os/user"
 	"strings"
 
-	"github.com/zricethezav/gitleaks/v4/version"
+	"github.com/zricethezav/gitleaks/v5/version"
 
 	"github.com/go-git/go-git/v5"
 	"github.com/go-git/go-git/v5/plumbing/transport/http"
@@ -28,7 +28,7 @@ const (
 
 // Options stores values of command line options
 type Options struct {
-	Verbose       bool   `short:"v" long:"verbose" description:"Show verbose output from audit"`
+	Verbose       bool   `short:"v" long:"verbose" description:"Show verbose output from scan"`
 	Repo          string `short:"r" long:"repo" description:"Target repository"`
 	Config        string `long:"config" description:"config path"`
 	Disk          bool   `long:"disk" description:"Clones repo(s) to disk"`
@@ -36,35 +36,35 @@ type Options struct {
 	Username      string `long:"username" description:"Username for git repo"`
 	Password      string `long:"password" description:"Password for git repo"`
 	AccessToken   string `long:"access-token" description:"Access token for git repo"`
-	Commit        string `long:"commit" description:"sha of commit to audit or \"latest\" to scan the last commit of the repository"`
-	FilesAtCommit string `long:"files-at-commit" description:"sha of commit to audit all files at commit"`
+	Commit        string `long:"commit" description:"sha of commit to scan or \"latest\" to scan the last commit of the repository"`
+	FilesAtCommit string `long:"files-at-commit" description:"sha of commit to scan all files at commit"`
 	Threads       int    `long:"threads" description:"Maximum number of threads gitleaks spawns"`
 	SSH           string `long:"ssh-key" description:"path to ssh key used for auth"`
 	Uncommited    bool   `long:"uncommitted" description:"run gitleaks on uncommitted code"`
 	RepoPath      string `long:"repo-path" description:"Path to repo"`
 	OwnerPath     string `long:"owner-path" description:"Path to owner directory (repos discovered)"`
-	Branch        string `long:"branch" description:"Branch to audit"`
+	Branch        string `long:"branch" description:"Branch to scan"`
 	Report        string `long:"report" description:"path to write json leaks file"`
 	ReportFormat  string `long:"report-format" default:"json" description:"json or csv"`
 	Redact        bool   `long:"redact" description:"redact secrets from log messages and leaks"`
 	Debug         bool   `long:"debug" description:"log debug messages"`
 	RepoConfig    bool   `long:"repo-config" description:"Load config from target repo. Config file must be \".gitleaks.toml\" or \"gitleaks.toml\""`
 	PrettyPrint   bool   `long:"pretty" description:"Pretty print json if leaks are present"`
-	CommitFrom    string `long:"commit-from" description:"Commit to start audit from"`
-	CommitTo      string `long:"commit-to" description:"Commit to stop audit"`
-	CommitSince   string `long:"commit-since" description:"Audit commits more recent than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
-	CommitUntil   string `long:"commit-until" description:"Audit commits older than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
-	Timeout       string `long:"timeout" description:"Time allowed per audit. Ex: 10us, 30s, 1m, 1h10m1s"`
-	Depth         int    `long:"depth" description:"Number of commits to audit"`
+	CommitFrom    string `long:"commit-from" description:"Commit to start scan from"`
+	CommitTo      string `long:"commit-to" description:"Commit to stop scan"`
+	CommitSince   string `long:"commit-since" description:"Scan commits more recent than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
+	CommitUntil   string `long:"commit-until" description:"Scan commits older than a specific date. Ex: '2006-01-02' or '2006-01-02T15:04:05-0700' format."`
+	Timeout       string `long:"timeout" description:"Time allowed per scan. Ex: 10us, 30s, 1m, 1h10m1s"`
+	Depth         int    `long:"depth" description:"Number of commits to scan"`
 	Deletion      bool   `long:"include-deletion" description:"Scan for patch deletions in addition to patch additions"`
 
 	// Hosts
 	Host         string `long:"host" description:"git hosting service like gitlab or github. Supported hosts include: Github, Gitlab"`
 	BaseURL      string `long:"baseurl" description:"Base URL for API requests. Defaults to the public GitLab or GitHub API, but can be set to a domain endpoint to use with a self hosted server."`
-	Organization string `long:"org" description:"organization to audit"`
-	User         string `long:"user" description:"user to audit"`
+	Organization string `long:"org" description:"organization to scan"`
+	User         string `long:"user" description:"user to scan"`
 	PullRequest  string `long:"pr" description:"pull/merge request url"`
-	ExcludeForks bool   `long:"exclude-forks" description:"audit excludes forks"`
+	ExcludeForks bool   `long:"exclude-forks" description:"scan excludes forks"`
 }
 
 // ParseOptions is responsible for parsing options passed in by cli. An Options struct
@@ -242,7 +242,7 @@ func (opts Options) CheckUncommitted() bool {
 }
 
 // GetAccessToken accepts options and returns a string which is the access token to a git host.
-// Setting this option or environment var is necessary if performing an audit with any of the git hosting providers
+// Setting this option or environment var is necessary if performing an scan with any of the git hosting providers
 // in the host pkg. The access token set by cli options takes precedence over env vars.
 func GetAccessToken(opts Options) string {
 	if opts.AccessToken != "" {

+ 296 - 0
scan/repo.go

@@ -0,0 +1,296 @@
+package scan
+
+import (
+	"context"
+	"crypto/md5"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path"
+	"path/filepath"
+	"runtime"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v5/config"
+	"github.com/zricethezav/gitleaks/v5/manager"
+
+	"github.com/BurntSushi/toml"
+	"github.com/go-git/go-billy/v5"
+	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing"
+	"github.com/go-git/go-git/v5/storage/memory"
+	log "github.com/sirupsen/logrus"
+)
+
+// Repo wraps a *git.Repository object in addition to a manager object and the name of the repo.
+// Commits are inspected from the *git.Repository object. If a Commit is found then we send it
+// via the manager LeakChan where the manager receives and keeps track of all leaks.
+type Repo struct {
+	*git.Repository
+
+	// config is used when the --repo-config option is set.
+	// This allows users to load up configs specific to their repos.
+	// Imagine the scenario where you are doing an scan of a large organization
+	// and you want certain repos to look for specific rules. If those specific repos
+	// have a gitleaks.toml or .gitleaks.toml config then those configs will be used specifically
+	// for those repo scans.
+	config config.Config
+
+	// ctx is used to signal timeouts to running goroutines
+	ctx    context.Context
+	cancel context.CancelFunc
+
+	Name    string
+	Manager *manager.Manager
+}
+
+// NewRepo initializes and returns a Repo struct.
+func NewRepo(m *manager.Manager) *Repo {
+	return &Repo{
+		Manager: m,
+		config:  m.Config,
+		ctx:     context.Background(),
+	}
+}
+
+// Run accepts a manager and begins an scan based on the options/configs set in the manager.
+func Run(m *manager.Manager) error {
+	if m.Opts.OwnerPath != "" {
+		files, err := ioutil.ReadDir(m.Opts.OwnerPath)
+		if err != nil {
+			return err
+		}
+		for _, f := range files {
+			if !f.IsDir() {
+				continue
+			}
+			m.Opts.RepoPath = fmt.Sprintf("%s/%s", m.Opts.OwnerPath, f.Name())
+			if err := runHelper(NewRepo(m)); err != nil {
+				log.Warnf("%s is not a git repo, skipping", f.Name())
+			}
+		}
+		return nil
+	}
+
+	return runHelper(NewRepo(m))
+}
+
+func runHelper(r *Repo) error {
+	// Ignore whitelisted repos
+	for _, wlRepo := range r.Manager.Config.Whitelist.Repos {
+		if RegexMatched(r.Manager.Opts.RepoPath, wlRepo) {
+			return nil
+		}
+		if RegexMatched(r.Manager.Opts.Repo, wlRepo) {
+			return nil
+		}
+	}
+	if r.Manager.Opts.OpenLocal() {
+		r.Name = path.Base(r.Manager.Opts.RepoPath)
+		if err := r.Open(); err != nil {
+			return err
+		}
+
+		// Check if we are checking uncommitted files. This is the default behavior
+		// for a "$ gitleaks" command with no options set
+		if r.Manager.Opts.CheckUncommitted() {
+			if err := r.scanUncommitted(); err != nil {
+				return err
+			}
+			return nil
+		}
+	} else {
+		if err := r.Clone(nil); err != nil {
+			return err
+		}
+	}
+	return r.Scan()
+}
+
+// Clone will clone a repo and return a Repo struct which contains a go-git repo. The clone method
+// is determined by the clone options set in Manager.metadata.cloneOptions
+func (repo *Repo) Clone(cloneOption *git.CloneOptions) error {
+	var (
+		repository *git.Repository
+		err        error
+	)
+	if cloneOption == nil {
+		cloneOption = repo.Manager.CloneOptions
+	}
+
+	log.Infof("cloning... %s", cloneOption.URL)
+	start := time.Now()
+
+	if repo.Manager.CloneDir != "" {
+		clonePath := fmt.Sprintf("%s/%x", repo.Manager.CloneDir, md5.Sum([]byte(time.Now().String())))
+		repository, err = git.PlainClone(clonePath, false, cloneOption)
+	} else {
+		repository, err = git.Clone(memory.NewStorage(), nil, cloneOption)
+	}
+	if err != nil {
+		return err
+	}
+	repo.Name = filepath.Base(repo.Manager.Opts.Repo)
+	repo.Repository = repository
+	repo.Manager.RecordTime(manager.CloneTime(howLong(start)))
+
+	return nil
+}
+
+// howManyThreads will return a number 1-GOMAXPROCS which is the number
+// of goroutines that will spawn during gitleaks execution
+func howManyThreads(threads int) int {
+	maxThreads := runtime.GOMAXPROCS(0)
+	if threads == 0 {
+		return 1
+	} else if threads > maxThreads {
+		log.Warnf("%d threads set too high, setting to system max, %d", threads, maxThreads)
+		return maxThreads
+	}
+	return threads
+}
+
+// getLogOptions determines what log options are used when iterating through commits.
+// It is similar to `git log {branch}`. Default behavior is to log ALL branches so
+// gitleaks gets the full git history.
+func getLogOptions(repo *Repo) (*git.LogOptions, error) {
+	var logOpts git.LogOptions
+	const dateformat string = "2006-01-02"
+	const timeformat string = "2006-01-02T15:04:05-0700"
+	if repo.Manager.Opts.CommitFrom != "" {
+		logOpts.From = plumbing.NewHash(repo.Manager.Opts.CommitFrom)
+	}
+	if repo.Manager.Opts.CommitSince != "" {
+		if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitSince); err == nil {
+			logOpts.Since = &t
+		} else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitSince); err == nil {
+			logOpts.Since = &t
+		} else {
+			return nil, err
+		}
+	}
+	if repo.Manager.Opts.CommitUntil != "" {
+		if t, err := time.Parse(timeformat, repo.Manager.Opts.CommitUntil); err == nil {
+			logOpts.Until = &t
+		} else if t, err := time.Parse(dateformat, repo.Manager.Opts.CommitUntil); err == nil {
+			logOpts.Until = &t
+		} else {
+			return nil, err
+		}
+	}
+	if repo.Manager.Opts.Branch != "" {
+		refs, err := repo.Storer.IterReferences()
+		if err != nil {
+			return nil, err
+		}
+		err = refs.ForEach(func(ref *plumbing.Reference) error {
+			if ref.Name().IsTag() {
+				return nil
+			}
+			// check heads first
+			if ref.Name().String() == "refs/heads/"+repo.Manager.Opts.Branch {
+				logOpts = git.LogOptions{
+					From: ref.Hash(),
+				}
+				return nil
+			} else if ref.Name().String() == "refs/remotes/origin/"+repo.Manager.Opts.Branch {
+				logOpts = git.LogOptions{
+					From: ref.Hash(),
+				}
+				return nil
+			}
+			return nil
+		})
+		if logOpts.From.IsZero() {
+			return nil, fmt.Errorf("could not find branch %s", repo.Manager.Opts.Branch)
+		}
+		return &logOpts, nil
+	}
+	if !logOpts.From.IsZero() || logOpts.Since != nil || logOpts.Until != nil {
+		return &logOpts, nil
+	}
+	return &git.LogOptions{All: true}, nil
+}
+
+// howLong accepts a time.Time object which is subtracted from time.Now() and
+// converted to nanoseconds which is returned
+func howLong(t time.Time) int64 {
+	return time.Now().Sub(t).Nanoseconds()
+}
+
+// Open opens a local repo either from repo-path or $PWD
+func (repo *Repo) Open() error {
+	if repo.Manager.Opts.RepoPath != "" {
+		// open git repo from repo path
+		repository, err := git.PlainOpen(repo.Manager.Opts.RepoPath)
+		if err != nil {
+			return err
+		}
+		repo.Repository = repository
+	} else {
+		// open git repo from PWD
+		dir, err := os.Getwd()
+		if err != nil {
+			return err
+		}
+		repository, err := git.PlainOpen(dir)
+		if err != nil {
+			return err
+		}
+		repo.Repository = repository
+		repo.Name = path.Base(dir)
+	}
+	return nil
+}
+
+func (repo *Repo) loadRepoConfig() (config.Config, error) {
+	wt, err := repo.Repository.Worktree()
+	if err != nil {
+		return config.Config{}, err
+	}
+	var f billy.File
+	f, _ = wt.Filesystem.Open(".gitleaks.toml")
+	if f == nil {
+		f, err = wt.Filesystem.Open("gitleaks.toml")
+		if err != nil {
+			return config.Config{}, fmt.Errorf("problem loading repo config: %v", err)
+		}
+	}
+	defer f.Close()
+	var tomlLoader config.TomlLoader
+	_, err = toml.DecodeReader(f, &tomlLoader)
+	return tomlLoader.Parse()
+}
+
+// timeoutReached returns true if the timeout deadline has been met. This function should be used
+// at the top of loops and before potentially long running goroutines (like checking inefficient regexes)
+func (repo *Repo) timeoutReached() bool {
+	if repo.ctx.Err() == context.DeadlineExceeded {
+		return true
+	}
+	return false
+}
+
+// setupTimeout parses the --timeout option and assigns a context with timeout to the manager
+// which will exit early if the timeout has been met.
+func (repo *Repo) setupTimeout() error {
+	if repo.Manager.Opts.Timeout == "" {
+		return nil
+	}
+	timeout, err := time.ParseDuration(repo.Manager.Opts.Timeout)
+	if err != nil {
+		return err
+	}
+
+	repo.ctx, repo.cancel = context.WithTimeout(context.Background(), timeout)
+
+	go func() {
+		select {
+		case <-repo.ctx.Done():
+			if repo.timeoutReached() {
+				log.Warnf("Timeout deadline (%s) exceeded for %s", timeout.String(), repo.Name)
+			}
+		}
+	}()
+	return nil
+}

+ 396 - 0
scan/rule.go

@@ -0,0 +1,396 @@
+package scan
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"math"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v5/config"
+	"github.com/zricethezav/gitleaks/v5/manager"
+
+	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
+	"github.com/go-git/go-git/v5/plumbing/object"
+	log "github.com/sirupsen/logrus"
+)
+
+const (
+	diffAddPrefix = "+"
+	diffAddFilePrefix = "+++ b"
+	diffAddFilePrefixSlash = "+++ b/"
+	diffLineSignature = " @@"
+	defaultLineNumber = -1
+)
+
+// CheckRules accepts bundle and checks each rule defined in the config against the bundle's content.
+func (repo *Repo) CheckRules(bundle *Bundle) {
+	filename := filepath.Base(bundle.FilePath)
+	path := filepath.Dir(bundle.FilePath)
+
+	bundle.lineLookup = make(map[string]bool)
+
+	// We want to check if there is a whitelist for this file
+	if len(repo.config.Whitelist.Files) != 0 {
+		for _, reFileName := range repo.config.Whitelist.Files {
+			if RegexMatched(filename, reFileName) {
+				log.Debugf("whitelisted file found, skipping scan of file: %s", filename)
+				return
+			}
+		}
+	}
+
+	// We want to check if there is a whitelist for this path
+	if len(repo.config.Whitelist.Paths) != 0 {
+		for _, reFilePath := range repo.config.Whitelist.Paths {
+			if RegexMatched(path, reFilePath) {
+				log.Debugf("file in whitelisted path found, skipping scan of file: %s", filename)
+				return
+			}
+		}
+	}
+
+	for _, rule := range repo.config.Rules {
+		start := time.Now()
+
+		// For each rule we want to check filename whitelists
+		if isFileNameWhiteListed(filename, rule.Whitelist) || isFilePathWhiteListed(path, rule.Whitelist) {
+			continue
+		}
+
+		// If it has fileNameRegex and it doesnt match we continue to next rule
+		if ruleContainFileNameRegex(rule) && !RegexMatched(filename, rule.FileNameRegex) {
+			continue
+		}
+
+		// If it has filePathRegex and it doesnt match we continue to next rule
+		if ruleContainFilePathRegex(rule) && !RegexMatched(path, rule.FilePathRegex) {
+			continue
+		}
+
+		// If it doesnt contain a Content regex then it is a filename regex match
+		if !ruleContainRegex(rule) {
+			repo.Manager.SendLeaks(manager.Leak{
+				LineNumber: defaultLineNumber,
+				Line:      "N/A",
+				Offender:  "Filename/path offender: " + filename,
+				Commit:    bundle.Commit.Hash.String(),
+				Repo:      repo.Name,
+				Message:   bundle.Commit.Message,
+				Rule:      rule.Description,
+				Author:    bundle.Commit.Author.Name,
+				Email:     bundle.Commit.Author.Email,
+				Date:      bundle.Commit.Author.When,
+				Tags:      strings.Join(rule.Tags, ", "),
+				File:      filename,
+				Operation: diffOpToString(bundle.Operation),
+			})
+		} else {
+			//otherwise we check if it matches Content regex
+			locs := rule.Regex.FindAllIndex([]byte(bundle.Content), -1)
+			if len(locs) != 0 {
+				for _, loc := range locs {
+					start := loc[0]
+					end := loc[1]
+					for start != 0 && bundle.Content[start] != '\n' {
+						start--
+					}
+
+					if bundle.Content[start] == '\n' {
+						start++
+					}
+
+					for end < len(bundle.Content)-1 && bundle.Content[end] != '\n' {
+						end++
+					}
+
+					line := bundle.Content[start:end]
+					offender := bundle.Content[loc[0]:loc[1]]
+					groups := rule.Regex.FindStringSubmatch(offender)
+
+					if isOffenderWhiteListed(offender, rule.Whitelist) {
+						continue
+					}
+
+					if len(rule.Entropies) != 0 && !trippedEntropy(groups, rule) {
+						continue
+					}
+
+					leak := manager.Leak{
+						LineNumber: defaultLineNumber,
+						Line:       line,
+						Offender:   offender,
+						Commit:     bundle.Commit.Hash.String(),
+						Repo:       repo.Name,
+						Message:    bundle.Commit.Message,
+						Rule:       rule.Description,
+						Author:     bundle.Commit.Author.Name,
+						Email:      bundle.Commit.Author.Email,
+						Date:       bundle.Commit.Author.When,
+						Tags:       strings.Join(rule.Tags, ", "),
+						File:       bundle.FilePath,
+						Operation:  diffOpToString(bundle.Operation),
+					}
+
+					// only search for line numbers on non-deletions
+					if bundle.Operation != fdiff.Delete {
+						extractAndInjectLineNumber(&leak, bundle, repo)
+					}
+
+					repo.Manager.SendLeaks(leak)
+				}
+			}
+		}
+
+		//	TODO should return filenameRegex if only file rule
+		repo.Manager.RecordTime(manager.RegexTime{
+			Time:  howLong(start),
+			Regex: rule.Regex.String(),
+		})
+	}
+}
+
+// RegexMatched matched an interface to a regular expression. The interface f can
+// be a string type or go-git *object.File type.
+func RegexMatched(f interface{}, re *regexp.Regexp) bool {
+	if re == nil {
+		return false
+	}
+	switch f.(type) {
+	case nil:
+		return false
+	case string:
+		if re.FindString(f.(string)) != "" {
+			return true
+		}
+		return false
+	case *object.File:
+		if re.FindString(f.(*object.File).Name) != "" {
+			return true
+		}
+		return false
+	}
+	return false
+}
+
+// diffOpToString converts a fdiff.Operation to a string
+func diffOpToString(operation fdiff.Operation) string {
+	switch operation {
+	case fdiff.Add:
+		return "addition"
+	case fdiff.Equal:
+		return "equal"
+	default:
+		return "deletion"
+	}
+}
+
+// extractAndInjectLine accepts a leak, bundle, and repo which it uses to do a reverse search in order to extract
+// the line number of a historic or present leak. The function is only called when the git operation is an addition
+// or none, it does not get called when the git operation is deletion.
+func extractAndInjectLineNumber(leak *manager.Leak, bundle *Bundle, repo *Repo) {
+	var err error
+
+	switch bundle.scanType {
+	case patchScan:
+		if bundle.Patch == "" {
+			return
+		}
+		scanner := bufio.NewScanner(strings.NewReader(bundle.Patch))
+		currFile := ""
+		currLine := 0
+		currStartDiffLine := 0
+
+		for scanner.Scan() {
+			txt := scanner.Text()
+			if strings.HasPrefix(txt, diffAddFilePrefix) {
+				currStartDiffLine = 1
+				currLine = 0
+				currFile = strings.Split(txt, diffAddFilePrefixSlash)[1]
+
+				// next line contains diff line information so lets scan it here
+				scanner.Scan()
+
+				txt := scanner.Text()
+				i := strings.Index(txt, diffAddPrefix)
+				pairs := strings.Split(strings.Split(txt[i+1:], diffLineSignature)[0], ",")
+				currStartDiffLine, err = strconv.Atoi(pairs[0])
+				if err != nil {
+					log.Debug(err)
+					return
+				}
+				continue
+			} else if strings.HasPrefix(txt, diffAddPrefix) && strings.Contains(txt, leak.Line) && leak.File == currFile {
+				potentialLine := currLine + currStartDiffLine
+				if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)]; !ok {
+					bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, potentialLine, currFile)] = true
+					leak.LineNumber = potentialLine
+					return
+				}
+			}
+			currLine++
+		}
+	case commitScan:
+		if bundle.Commit == nil {
+			return
+		}
+		f, err := bundle.Commit.File(bundle.FilePath)
+		if err != nil {
+			log.Error(err)
+			return
+		}
+		r, err := f.Reader()
+		if err != nil {
+			log.Error(err)
+			return
+		}
+		leak.LineNumber = extractLineHelper(r, bundle, leak)
+	case uncommittedScan:
+		wt, err := repo.Worktree()
+		if err != nil {
+			log.Error(err)
+			return
+		}
+		f, err := wt.Filesystem.Open(leak.File)
+		if err != nil {
+			log.Error(err)
+			return
+		}
+		leak.LineNumber = extractLineHelper(f, bundle, leak)
+	}
+}
+
+// extractLineHelper consolidates code for checking the leak line against the contents of a reader to find the
+// line number of the leak.
+func extractLineHelper(r io.Reader, bundle *Bundle, leak *manager.Leak) int {
+	scanner := bufio.NewScanner(r)
+	lineNumber := 1
+	for scanner.Scan() {
+		if leak.Line == scanner.Text() {
+			if _, ok := bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)]; !ok {
+				bundle.lineLookup[fmt.Sprintf("%s%d%s", leak.Line, lineNumber, bundle.FilePath)] = true
+				return lineNumber
+			}
+		}
+		lineNumber++
+	}
+	return -1
+}
+
+// trippedEntropy checks if a given capture group or offender falls in between entropy ranges
+// supplied by a custom gitleaks configuration. Gitleaks do not check entropy by default.
+func trippedEntropy(groups []string, rule config.Rule) bool {
+	for _, e := range rule.Entropies {
+		if len(groups) > e.Group {
+			entropy := shannonEntropy(groups[e.Group])
+			if entropy >= e.Min && entropy <= e.Max {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// shannonEntropy calculates the entropy of data using the formula defined here:
+// https://en.wiktionary.org/wiki/Shannon_entropy
+// Another way to think about what this is doing is calculating the number of bits
+// needed to on average encode the data. So, the higher the entropy, the more random the data, the
+// more bits needed to encode that data.
+func shannonEntropy(data string) (entropy float64) {
+	if data == "" {
+		return 0
+	}
+
+	charCounts := make(map[rune]int)
+	for _, char := range data {
+		charCounts[char]++
+	}
+
+	invLength := 1.0 / float64(len(data))
+	for _, count := range charCounts {
+		freq := float64(count) * invLength
+		entropy -= freq * math.Log2(freq)
+	}
+
+	return entropy
+}
+
+// Checks if the given rule has a regex
+func ruleContainRegex(rule config.Rule) bool {
+	if rule.Regex == nil {
+		return false
+	}
+	if rule.Regex.String() == "" {
+		return false
+	}
+	return true
+}
+
+// Checks if the given rule has a file name regex
+func ruleContainFileNameRegex(rule config.Rule) bool {
+	if rule.FileNameRegex == nil {
+		return false
+	}
+	if rule.FileNameRegex.String() == "" {
+		return false
+	}
+	return true
+}
+
+// Checks if the given rule has a file path regex
+func ruleContainFilePathRegex(rule config.Rule) bool {
+	if rule.FilePathRegex == nil {
+		return false
+	}
+	if rule.FilePathRegex.String() == "" {
+		return false
+	}
+	return true
+}
+
+func isCommitWhiteListed(commitHash string, whitelistedCommits []string) bool {
+	for _, hash := range whitelistedCommits {
+		if commitHash == hash {
+			return true
+		}
+	}
+	return false
+}
+
+func isOffenderWhiteListed(offender string, whitelist []config.Whitelist) bool {
+	if len(whitelist) != 0 {
+		for _, wl := range whitelist {
+			if wl.Regex.FindString(offender) != "" {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func isFileNameWhiteListed(filename string, whitelist []config.Whitelist) bool {
+	if len(whitelist) != 0 {
+		for _, wl := range whitelist {
+			if RegexMatched(filename, wl.File) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func isFilePathWhiteListed(filepath string, whitelist []config.Whitelist) bool {
+	if len(whitelist) != 0 {
+		for _, wl := range whitelist {
+			if RegexMatched(filepath, wl.Path) {
+				return true
+			}
+		}
+	}
+	return false
+}
+

+ 455 - 0
scan/scan.go

@@ -0,0 +1,455 @@
+package scan
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"sync"
+	"time"
+
+	"github.com/zricethezav/gitleaks/v5/manager"
+
+	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing"
+	fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
+	"github.com/go-git/go-git/v5/plumbing/object"
+	"github.com/go-git/go-git/v5/plumbing/storer"
+	"github.com/sergi/go-diff/diffmatchpatch"
+	log "github.com/sirupsen/logrus"
+)
+
+// Bundle contains various git information for scans.
+type Bundle struct {
+	Commit    *object.Commit
+	Patch     string
+	Content   string
+	FilePath  string
+	Operation fdiff.Operation
+
+	reader     io.Reader
+	lineLookup map[string]bool
+	scanType   int
+}
+
+// commitScanner is a function signature for scanning commits. There is some
+// redundant work needed by scanning all files at a commit (--files-at-commit=) and scanning
+// the patches generated by a commit (--commit=). The function scanCommit wraps that redundant work
+// and accepts a commitScanner for the different logic needed between the two cases described above.
+type commitScanner func(c *object.Commit, repo *Repo) error
+
+const (
+	// We need to differentiate between scans as the logic for line searching is different between
+	// scanning patches, commits, and uncommitted files.
+	patchScan int = iota + 1
+	uncommittedScan
+	commitScan
+)
+
+// Scan is responsible for scanning the entire history (default behavior) of a
+// git repo. Options that can change the behavior of this function include: --Commit, --depth, --branch.
+// See options/options.go for an explanation on these options.
+func (repo *Repo) Scan() error {
+	if err := repo.setupTimeout(); err != nil {
+		return err
+	}
+	if repo.cancel != nil {
+		defer repo.cancel()
+	}
+
+	if repo.Repository == nil {
+		return fmt.Errorf("%s repo is empty", repo.Name)
+	}
+
+	// load up alternative config if possible, if not use manager's config
+	if repo.Manager.Opts.RepoConfig {
+		cfg, err := repo.loadRepoConfig()
+		if err != nil {
+			return err
+		}
+		repo.config = cfg
+	}
+
+	scanTimeStart := time.Now()
+
+	// scan Commit patches OR all files at Commit. See https://github.com/zricethezav/gitleaks/issues/326
+	if repo.Manager.Opts.Commit != "" {
+		return scanCommit(repo.Manager.Opts.Commit, repo, scanCommitPatches)
+	} else if repo.Manager.Opts.FilesAtCommit != "" {
+		return scanCommit(repo.Manager.Opts.FilesAtCommit, repo, scanFilesAtCommit)
+	}
+
+	logOpts, err := getLogOptions(repo)
+	if err != nil {
+		return err
+	}
+	cIter, err := repo.Log(logOpts)
+	if err != nil {
+		return err
+	}
+
+	cc := 0
+	semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
+	wg := sync.WaitGroup{}
+	err = cIter.ForEach(func(c *object.Commit) error {
+		if c == nil || repo.timeoutReached() || repo.depthReached(cc) {
+			return storer.ErrStop
+		}
+
+		// Check if Commit is whitelisted
+		if isCommitWhiteListed(c.Hash.String(), repo.config.Whitelist.Commits) {
+			return nil
+		}
+
+		// Check if at root
+		if len(c.ParentHashes) == 0 {
+			cc++
+			err = scanFilesAtCommit(c, repo)
+			if err != nil {
+				return err
+			}
+			return nil
+		}
+
+		// increase Commit counter
+		cc++
+
+		err = c.Parents().ForEach(func(parent *object.Commit) error {
+			defer func() {
+				if err := recover(); err != nil {
+					// sometimes the Patch generation will fail due to a known bug in
+					// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
+					// Once a fix has been merged I will remove this recover.
+					return
+				}
+			}()
+			if repo.timeoutReached() {
+				return nil
+			}
+			if parent == nil {
+				// shouldn't reach this point but just in case
+				return nil
+			}
+
+			start := time.Now()
+			patch, err := parent.Patch(c)
+			if err != nil {
+				return fmt.Errorf("could not generate Patch")
+			}
+			repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
+			wg.Add(1)
+			semaphore <- true
+			go func(c *object.Commit, patch *object.Patch) {
+				defer func() {
+					<-semaphore
+					wg.Done()
+				}()
+				scanPatch(patch, c, repo)
+			}(c, patch)
+
+			return nil
+		})
+		if c.Hash.String() == repo.Manager.Opts.CommitTo {
+			return storer.ErrStop
+		}
+		return nil
+	})
+
+	wg.Wait()
+	repo.Manager.RecordTime(manager.AuditTime(howLong(scanTimeStart)))
+	repo.Manager.IncrementCommits(cc)
+	return nil
+}
+
+// scanEmpty scans an empty repo without any commits. See https://github.com/zricethezav/gitleaks/issues/352
+func (repo *Repo) scanEmpty() error {
+	scanTimeStart := time.Now()
+	wt, err := repo.Worktree()
+	if err != nil {
+		return err
+	}
+
+	status, err := wt.Status()
+	for fn := range status {
+		workTreeBuf := bytes.NewBuffer(nil)
+		workTreeFile, err := wt.Filesystem.Open(fn)
+		if err != nil {
+			continue
+		}
+		if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
+			return err
+		}
+		repo.CheckRules(&Bundle{
+			Content:  workTreeBuf.String(),
+			FilePath: workTreeFile.Name(),
+			Commit:   emptyCommit(),
+			scanType: uncommittedScan,
+		})
+	}
+	repo.Manager.RecordTime(manager.AuditTime(howLong(scanTimeStart)))
+	return nil
+}
+
+// scanUncommitted will do a `git diff` and scan changed files that are being tracked. This is useful functionality
+// for a pre-Commit hook so you can make sure your code does not have any leaks before committing.
+func (repo *Repo) scanUncommitted() error {
+	// load up alternative config if possible, if not use manager's config
+	if repo.Manager.Opts.RepoConfig {
+		cfg, err := repo.loadRepoConfig()
+		if err != nil {
+			return err
+		}
+		repo.config = cfg
+	}
+
+	if err := repo.setupTimeout(); err != nil {
+		return err
+	}
+
+	r, err := repo.Head()
+	if err == plumbing.ErrReferenceNotFound {
+		// possibly an empty repo, or maybe its not, either way lets scan all the files in the directory
+		return repo.scanEmpty()
+	} else if err != nil {
+		return err
+	}
+
+	scanTimeStart := time.Now()
+
+	c, err := repo.CommitObject(r.Hash())
+	if err != nil {
+		return err
+	}
+	// Staged change so the Commit details do not yet exist. Insert empty defaults.
+	c.Hash = plumbing.Hash{}
+	c.Message = "***STAGED CHANGES***"
+	c.Author.Name = ""
+	c.Author.Email = ""
+	c.Author.When = time.Unix(0, 0).UTC()
+
+	prevTree, err := c.Tree()
+	if err != nil {
+		return err
+	}
+	wt, err := repo.Worktree()
+	if err != nil {
+		return err
+	}
+
+	status, err := wt.Status()
+	for fn, state := range status {
+		var (
+			prevFileContents string
+			currFileContents string
+			filename         string
+		)
+
+		if state.Staging != git.Untracked {
+			if state.Staging == git.Deleted {
+				// file in staging has been deleted, aka it is not on the filesystem
+				// so the contents of the file are ""
+				currFileContents = ""
+			} else {
+				workTreeBuf := bytes.NewBuffer(nil)
+				workTreeFile, err := wt.Filesystem.Open(fn)
+				if err != nil {
+					continue
+				}
+				if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
+					return err
+				}
+				currFileContents = workTreeBuf.String()
+				filename = workTreeFile.Name()
+			}
+
+			// get files at HEAD state
+			prevFile, err := prevTree.File(fn)
+			if err != nil {
+				prevFileContents = ""
+
+			} else {
+				prevFileContents, err = prevFile.Contents()
+				if err != nil {
+					return err
+				}
+				if filename == "" {
+					filename = prevFile.Name
+				}
+			}
+
+			diffs := diffmatchpatch.New().DiffMain(prevFileContents, currFileContents, false)
+			var diffContents string
+			for _, d := range diffs {
+				if d.Type == diffmatchpatch.DiffInsert {
+					diffContents += fmt.Sprintf("%s\n", d.Text)
+				}
+			}
+			repo.CheckRules(&Bundle{
+				Content:  diffContents,
+				FilePath: filename,
+				Commit:   c,
+				scanType: uncommittedScan,
+			})
+		}
+	}
+
+	if err != nil {
+		return err
+	}
+	repo.Manager.RecordTime(manager.AuditTime(howLong(scanTimeStart)))
+	return nil
+}
+
+// scan accepts a Patch, Commit, and repo. If the patches contains files that are
+// binary, then gitleaks will skip scanning that file OR if a file is matched on
+// whitelisted files set in the configuration. If a global rule for files is defined and a filename
+// matches said global rule, then a leak is sent to the manager.
+// After that, file chunks are created which are then inspected by InspectString()
+func scanPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
+	bundle := Bundle{
+		Commit:   c,
+		Patch:    patch.String(),
+		scanType: patchScan,
+	}
+	for _, f := range patch.FilePatches() {
+		if repo.timeoutReached() {
+			return
+		}
+		if f.IsBinary() {
+			continue
+		}
+		for _, chunk := range f.Chunks() {
+			if chunk.Type() == fdiff.Add || (repo.Manager.Opts.Deletion && chunk.Type() == fdiff.Delete) {
+				bundle.Content = chunk.Content()
+				bundle.Operation = chunk.Type()
+
+				// get filepath
+				from, to := f.Files()
+				if from != nil {
+					bundle.FilePath = from.Path()
+				} else if to != nil {
+					bundle.FilePath = to.Path()
+				} else {
+					bundle.FilePath = "???"
+				}
+				repo.CheckRules(&bundle)
+			}
+		}
+	}
+}
+
+// scanCommit accepts a Commit hash, repo, and commit scanning function. A new Commit
+// object will be created from the hash which will be passed into either scanCommitPatches
+// or scanFilesAtCommit depending on the options set.
+func scanCommit(commit string, repo *Repo, f commitScanner) error {
+	if commit == "latest" {
+		ref, err := repo.Repository.Head()
+		if err != nil {
+			return err
+		}
+		commit = ref.Hash().String()
+	}
+	repo.Manager.IncrementCommits(1)
+	h := plumbing.NewHash(commit)
+	c, err := repo.CommitObject(h)
+	if err != nil {
+		return err
+	}
+	return f(c, repo)
+}
+
+// scanCommitPatches accepts a Commit object and a repo. This function is only called when the --Commit=
+// option has been set. That option tells gitleaks to look only at a single Commit and check the contents
+// of said Commit. Similar to scan(), if the files contained in the Commit are a binaries or if they are
+// whitelisted then those files will be skipped.
+func scanCommitPatches(c *object.Commit, repo *Repo) error {
+	if len(c.ParentHashes) == 0 {
+		err := scanFilesAtCommit(c, repo)
+		if err != nil {
+			return err
+		}
+	}
+
+	return c.Parents().ForEach(func(parent *object.Commit) error {
+		defer func() {
+			if err := recover(); err != nil {
+				// sometimes the Patch generation will fail due to a known bug in
+				// sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
+				// Once a fix has been merged I will remove this recover.
+				return
+			}
+		}()
+		if repo.timeoutReached() {
+			return nil
+		}
+		if parent == nil {
+			return nil
+		}
+		start := time.Now()
+		patch, err := parent.Patch(c)
+		if err != nil {
+			return fmt.Errorf("could not generate Patch")
+		}
+		repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
+
+		scanPatch(patch, c, repo)
+
+		return nil
+	})
+}
+
+// scanFilesAtCommit accepts a Commit object and a repo. This function is only called when the --files-at-Commit=
+// option has been set. That option tells gitleaks to look only at ALL the files at a Commit and check the contents
+// of said Commit. Similar to scan(), if the files contained in the Commit are a binaries or if they are
+// whitelisted then those files will be skipped.
+func scanFilesAtCommit(c *object.Commit, repo *Repo) error {
+	fIter, err := c.Files()
+	if err != nil {
+		return err
+	}
+
+	err = fIter.ForEach(func(f *object.File) error {
+		bin, err := f.IsBinary()
+		if bin || repo.timeoutReached() {
+			return nil
+		} else if err != nil {
+			return err
+		}
+
+		content, err := f.Contents()
+		if err != nil {
+			return err
+		}
+
+		repo.CheckRules(&Bundle{
+			Content:   content,
+			FilePath:  f.Name,
+			Commit:    c,
+			scanType:  commitScan,
+			Operation: fdiff.Add,
+		})
+		return nil
+	})
+	return err
+}
+
+// depthReached checks if i meets the depth (--depth=) if set
+func (repo *Repo) depthReached(i int) bool {
+	if repo.Manager.Opts.Depth != 0 && repo.Manager.Opts.Depth == i {
+		log.Warnf("Exceeded depth limit (%d)", i)
+		return true
+	}
+	return false
+}
+
+// emptyCommit generates an empty commit used for scanning uncommitted changes
+func emptyCommit() *object.Commit {
+	return &object.Commit{
+		Hash:    plumbing.Hash{},
+		Message: "***STAGED CHANGES***",
+		Author: object.Signature{
+			Name:  "",
+			Email: "",
+			When:  time.Unix(0, 0).UTC(),
+		},
+	}
+}

+ 19 - 19
audit/audit_test.go → scan/scan_test.go

@@ -1,4 +1,4 @@
-package audit
+package scan
 
 import (
 	"encoding/json"
@@ -10,16 +10,16 @@ import (
 	"sort"
 	"testing"
 
-	"github.com/zricethezav/gitleaks/v4/config"
-	"github.com/zricethezav/gitleaks/v4/manager"
-	"github.com/zricethezav/gitleaks/v4/options"
+	"github.com/zricethezav/gitleaks/v5/config"
+	"github.com/zricethezav/gitleaks/v5/manager"
+	"github.com/zricethezav/gitleaks/v5/options"
 
 	"github.com/sergi/go-diff/diffmatchpatch"
 )
 
 const testRepoBase = "../test_data/test_repos/"
 
-func TestAudit(t *testing.T) {
+func TestScan(t *testing.T) {
 	moveDotGit("dotGit", ".git")
 	defer moveDotGit(".git", "dotGit")
 	tests := []struct {
@@ -76,7 +76,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_two_leaks.json",
 		},
 		{
-			description: "test local repo two leaks from commit",
+			description: "test local repo two leaks from Commit",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_commit_from.json.got",
@@ -86,7 +86,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_two_leaks_commit_from.json",
 		},
 		{
-			description: "test local repo two leaks to commit",
+			description: "test local repo two leaks to Commit",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_commit_to.json.got",
@@ -96,7 +96,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_two_leaks_commit_to.json",
 		},
 		{
-			description: "test local repo two leaks range commit",
+			description: "test local repo two leaks range Commit",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_leaks_commit_range.json.got",
@@ -145,7 +145,7 @@ func TestAudit(t *testing.T) {
 			wantEmpty: true,
 		},
 		{
-			description: "test local repo one aws leak single commit",
+			description: "test local repo one aws leak single Commit",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_local_repo_one_aws_leak_commit.json.got",
@@ -244,7 +244,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_owner_aws_leak_depth_2.json",
 		},
 		{
-			description: "test local repo five files at commit",
+			description: "test local repo five files at Commit",
 			opts: options.Options{
 				RepoPath:      "../test_data/test_repos/test_repo_5",
 				Report:        "../test_data/test_local_repo_five_files_at_commit.json.got",
@@ -254,7 +254,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_five_files_at_commit.json",
 		},
 		{
-			description: "test local repo five files at latest commit",
+			description: "test local repo five files at latest Commit",
 			opts: options.Options{
 				RepoPath:      "../test_data/test_repos/test_repo_5",
 				Report:        "../test_data/test_local_repo_five_files_at_latest_commit.json.got",
@@ -264,7 +264,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_five_files_at_commit.json",
 		},
 		{
-			description: "test local repo five at commit",
+			description: "test local repo five at Commit",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_5",
 				Report:       "../test_data/test_local_repo_five_commit.json.got",
@@ -275,7 +275,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_five_commit.json",
 		},
 		{
-			description: "test local repo five at latest commit",
+			description: "test local repo five at latest Commit",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_5",
 				Report:       "../test_data/test_local_repo_five_at_latest_commit.json.got",
@@ -346,7 +346,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_six_leaks_until_date.json",
 		},
 		{
-			description: "test local repo four leaks timerange commit",
+			description: "test local repo four leaks timerange Commit",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_4",
 				Report:       "../test_data/test_local_repo_four_leaks_commit_timerange.json.got",
@@ -357,7 +357,7 @@ func TestAudit(t *testing.T) {
 			wantPath: "../test_data/test_local_repo_four_leaks_commit_timerange.json",
 		},
 		{
-			description: "test local repo two whitelist commit config",
+			description: "test local repo two whitelist Commit config",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_2",
 				Report:       "../test_data/test_local_repo_two_whitelist_commits.json.got",
@@ -420,7 +420,7 @@ func TestAudit(t *testing.T) {
 	}
 }
 
-func TestAuditUncommited(t *testing.T) {
+func TestScanUncommited(t *testing.T) {
 	moveDotGit("dotGit", ".git")
 	defer moveDotGit(".git", "dotGit")
 	tests := []struct {
@@ -434,7 +434,7 @@ func TestAuditUncommited(t *testing.T) {
 		addition     string
 	}{
 		{
-			description: "test audit local one leak",
+			description: "test scan local one leak",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_1",
 				Report:       "../test_data/test_local_repo_one_aws_leak_uncommitted.json.got",
@@ -446,7 +446,7 @@ func TestAuditUncommited(t *testing.T) {
 			addition:     " aws_access_key_id='AKIAIO5FODNN7DXAMPLE'\n\n",
 		},
 		{
-			description: "test audit local no leak",
+			description: "test scan local no leak",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_1",
 				Uncommited:   true,
@@ -457,7 +457,7 @@ func TestAuditUncommited(t *testing.T) {
 			addition:     "nothing bad",
 		},
 		{
-			description: "test audit repo with no commits",
+			description: "test scan repo with no commits",
 			opts: options.Options{
 				RepoPath:     "../test_data/test_repos/test_repo_7",
 				Report:       "../test_data/test_local_repo_seven_aws_leak_uncommitted.json.got",

+ 29 - 11
test_data/test_local_owner_aws_leak.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
@@ -23,10 +26,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
@@ -36,10 +41,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\nHere's an AWS secret: AKIALALEMEL33243OLIAE",
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
@@ -49,10 +56,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_3",
@@ -62,10 +71,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 6,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "deea550dd6c7acaf0e59432600593533984a2125",
   "repo": "test_repo_3",
@@ -75,10 +86,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:35:03-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\naws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "lineNumber": 4,
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "ca71fcdeda15f25f0cc661d90e8785c255925c27",
   "repo": "test_repo_5",
@@ -88,10 +101,12 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:08:04-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE'",
+  "lineNumber": 1,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "1f2a4abc47dabf991e6af6f9770867ce0ac1f360",
   "repo": "test_repo_5",
@@ -101,10 +116,12 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:07:34-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
+  "lineNumber": 3,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -114,6 +131,7 @@
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 16 - 6
test_data/test_local_owner_aws_leak_depth_2.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
@@ -23,10 +26,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_3",
@@ -36,10 +41,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\naws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "lineNumber": 4,
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "ca71fcdeda15f25f0cc661d90e8785c255925c27",
   "repo": "test_repo_5",
@@ -49,10 +56,12 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:08:04-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
+  "lineNumber": 3,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -62,6 +71,7 @@
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 29 - 11
test_data/test_local_owner_aws_leak_whitelist_repo.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
@@ -23,10 +26,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\nHere's an AWS secret: AKIALALEMEL33243OLIAE",
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
@@ -36,10 +41,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "const AWSKEY = \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 4,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_3",
@@ -49,10 +56,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 6,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "deea550dd6c7acaf0e59432600593533984a2125",
   "repo": "test_repo_3",
@@ -62,10 +71,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:35:03-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\naws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "lineNumber": 4,
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "ca71fcdeda15f25f0cc661d90e8785c255925c27",
   "repo": "test_repo_5",
@@ -75,10 +86,12 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:08:04-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE'",
+  "lineNumber": 1,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "1f2a4abc47dabf991e6af6f9770867ce0ac1f360",
   "repo": "test_repo_5",
@@ -88,10 +101,12 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:07:34-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
+  "lineNumber": 3,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -101,10 +116,12 @@
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_6",
@@ -114,6 +131,7 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 4 - 2
test_data/test_local_repo_five_at_latest_commit.json

@@ -1,6 +1,7 @@
 [
  {
-  "line": "\nmore_secrets = '99432bfewaf823ec3294e231'",
+  "line": "more_secrets = '99432bfewaf823ec3294e231'",
+  "lineNumber": 7,
   "offender": "secrets = '99432bfewaf823ec3294e231",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
@@ -10,6 +11,7 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:30:22-05:00",
-  "tags": "key, API, generic"
+  "tags": "key, API, generic",
+  "operation": "addition"
  }
 ]

+ 4 - 2
test_data/test_local_repo_five_commit.json

@@ -1,6 +1,7 @@
 [
  {
-  "line": "\nmore_secrets = '99432bfewaf823ec3294e231'",
+  "line": "more_secrets = '99432bfewaf823ec3294e231'",
+  "lineNumber": 7,
   "offender": "secrets = '99432bfewaf823ec3294e231",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
@@ -10,6 +11,7 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:30:22-05:00",
-  "tags": "key, API, generic"
+  "tags": "key, API, generic",
+  "operation": "addition"
  }
 ]

+ 6 - 2
test_data/test_local_repo_five_files_at_commit.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE'",
+  "lineNumber": 1,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
@@ -10,10 +11,12 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:30:22-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "lineNumber": 4,
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
@@ -23,6 +26,7 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:30:22-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 6 - 2
test_data/test_local_repo_five_files_at_latest_commit.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPLE'",
+  "lineNumber": 1,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
@@ -10,10 +11,12 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:30:22-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "aws_access_key_id='AKIAIO5FODNN7EXAMPL2'",
+  "lineNumber": 4,
   "offender": "AKIAIO5FODNN7EXAMPL2",
   "commit": "a4c9fb737d5552fd96fce5cc7eedb23353ba9ed0",
   "repo": "test_repo_5",
@@ -23,6 +26,7 @@
   "email": "zrice@gitlab.com",
   "file": "secrets.py",
   "date": "2020-02-01T10:30:22-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 6 - 2
test_data/test_local_repo_four_alt_config_entropy.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "const AWSSECRET = \"99432bfewaf823ec3294e231\"",
+  "lineNumber": 5,
   "offender": "\"99432bfewaf823ec3294e231\"",
   "commit": "84ac4e80d4dbf2c968b64e9d4005f5079795bb81",
   "repo": "test_repo_4",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:54:08-04:00",
-  "tags": "entropy"
+  "tags": "entropy",
+  "operation": "addition"
  },
  {
   "line": "    const AWSSECRET = \"99432bfewaf823ec3294e231\"",
+  "lineNumber": 6,
   "offender": "\"99432bfewaf823ec3294e231\"",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_4",
@@ -23,6 +26,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "entropy"
+  "tags": "entropy",
+  "operation": "addition"
  }
 ]

+ 6 - 2
test_data/test_local_repo_four_leaks_commit_timerange.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_4",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_4",
@@ -23,6 +26,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_one_aws_leak.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 9 - 3
test_data/test_local_repo_one_aws_leak_and_file_leak.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "N/A",
+  "lineNumber": -1,
   "offender": "Filename/path offender: server.test.py",
   "commit": "d274003914c707212cbe84e3e466a00013ccb639",
   "repo": "test_repo_1",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T10:03:38-04:00",
-  "tags": ""
+  "tags": "",
+  "operation": "addition"
  },
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
@@ -23,10 +26,12 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "N/A",
+  "lineNumber": -1,
   "offender": "Filename/path offender: server.test.py",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
@@ -36,6 +41,7 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": ""
+  "tags": "",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_one_aws_leak_commit.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_one_aws_leak_uncommitted.json

@@ -1,6 +1,7 @@
 [
  {
   "line": " aws_access_key_id='AKIAIO5FODNN7DXAMPLE'",
+  "lineNumber": 10,
   "offender": "AKIAIO5FODNN7DXAMPLE",
   "commit": "0000000000000000000000000000000000000000",
   "repo": "test_repo_1",
@@ -10,6 +11,7 @@
   "email": "",
   "file": "server.test.py",
   "date": "1970-01-01T00:00:00Z",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "equal"
  }
 ]

+ 3 - 1
test_data/test_local_repo_seven_aws_leak_uncommitted.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "AKIAIO5FODNN7EXAMPLE",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "0000000000000000000000000000000000000000",
   "repo": "test_repo_7",
@@ -10,6 +11,7 @@
   "email": "",
   "file": "file",
   "date": "1970-01-01T00:00:00Z",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "equal"
  }
 ]

+ 6 - 2
test_data/test_local_repo_six_filename.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "api_token: exampleSecretPassword",
+  "lineNumber": 2,
   "offender": "token: exampleSecretPassword",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -10,10 +11,12 @@
   "email": "noealgigu@gmail.com",
   "file": "application.yaml",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, Yml, Yaml"
+  "tags": "key, Yml, Yaml",
+  "operation": "addition"
  },
  {
   "line": "db_password: verySecretProductionPassword",
+  "lineNumber": 3,
   "offender": "password: verySecretProductionPassword",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -23,6 +26,7 @@
   "email": "noealgigu@gmail.com",
   "file": "application.yaml",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, Yml, Yaml"
+  "tags": "key, Yml, Yaml",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_six_filepath.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
+  "lineNumber": 3,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -10,6 +11,7 @@
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_six_filepath_filename.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "apiToken=d41d8cd98f00b204e9800998ecf8427e",
+  "lineNumber": 2,
   "offender": "Token=d41d8cd98f00b204e9800998ecf8427e",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -10,6 +11,7 @@
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, Yml, Yaml"
+  "tags": "key, Yml, Yaml",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_six_leaks_since_date.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "aws_access_key_id=AKIAIO5FODNN7EXAMPLE",
+  "lineNumber": 3,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "98b6c7cb3fb29a5993c4c95c56a2dc53050b9247",
   "repo": "test_repo_6",
@@ -10,6 +11,7 @@
   "email": "noealgigu@gmail.com",
   "file": "config/application.properties",
   "date": "2020-02-24T14:13:15-05:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_six_leaks_until_date.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_6",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_six_path_globally_whitelisted.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    aws_access_key_id='AKIAIO5FODNN7EXAMPLE',",
+  "lineNumber": 5,
   "offender": "AKIAIO5FODNN7EXAMPLE",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_6",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 13 - 5
test_data/test_local_repo_three_leaks.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 6,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "deea550dd6c7acaf0e59432600593533984a2125",
   "repo": "test_repo_3",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:35:03-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_3",
@@ -23,10 +26,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_3",
@@ -36,10 +41,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\nHere's an AWS secret: AKIALALEMEL33243OLIAE",
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_3",
@@ -49,6 +56,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 10 - 4
test_data/test_local_repo_two_leaks.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
@@ -23,10 +26,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\nHere's an AWS secret: AKIALALEMEL33243OLIAE",
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
@@ -36,6 +41,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 7 - 3
test_data/test_local_repo_two_leaks_commit_from.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\nHere's an AWS secret: AKIALALEMEL33243OLIAE",
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
@@ -23,6 +26,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_two_leaks_commit_range.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_two_leaks_commit_to.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 19 - 7
test_data/test_local_repo_two_leaks_deletion.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": -1,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "f61cd8587b7ac1d75a89a0c9af870a2f24c60263",
   "repo": "test_repo_2",
@@ -10,10 +11,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:32-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "deletion"
  },
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
@@ -23,10 +26,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": -1,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "996865bb912f3bc45898a370a13aadb315014b55",
   "repo": "test_repo_2",
@@ -36,10 +41,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:07:41-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "deletion"
  },
  {
   "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": -1,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
@@ -49,10 +56,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "deletion"
  },
  {
   "line": "Here's an AWS secret: \"AKIALALEMEL33243OLIAE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "17471a5fda722a9e423f1a0d3f0d267ea009d41c",
   "repo": "test_repo_2",
@@ -62,10 +71,12 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:01:27-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  },
  {
-  "line": "\nHere's an AWS secret: AKIALALEMEL33243OLIAE",
+  "line": "Here's an AWS secret: AKIALALEMEL33243OLIAE",
+  "lineNumber": 3,
   "offender": "AKIALALEMEL33243OLIA",
   "commit": "b10b3e2cb320a8c211fda94c4567299d37de7776",
   "repo": "test_repo_2",
@@ -75,6 +86,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T12:58:39-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_local_repo_two_whitelist_commits.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    const AWSKEY = \"AKIALALEMEL33243OLIBE\"",
+  "lineNumber": 5,
   "offender": "AKIALALEMEL33243OLIB",
   "commit": "b2eb34a61c988afd9b4aaa9dd58c8dd7d5f14dba",
   "repo": "test_repo_2",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "secrets.md",
   "date": "2019-10-25T13:12:08-04:00",
-  "tags": "key, AWS"
+  "tags": "key, AWS",
+  "operation": "addition"
  }
 ]

+ 3 - 1
test_data/test_regex_entropy.json

@@ -1,6 +1,7 @@
 [
  {
   "line": "    aws_secret_access_key='ABCDEF+c2L7yXeGvUyrPgYsDnWRRC1AYEXAMPLE'",
+  "lineNumber": 6,
   "offender": "'ABCDEF+c2L7yXeGvUyrPgYsDnWRRC1AYEXAMPLE'",
   "commit": "6557c92612d3b35979bd426d429255b3bf9fab74",
   "repo": "test_repo_1",
@@ -10,6 +11,7 @@
   "email": "zricer@protonmail.com",
   "file": "server.test.py",
   "date": "2019-10-24T09:29:27-04:00",
-  "tags": "entropy"
+  "tags": "entropy",
+  "operation": "addition"
  }
 ]