Browse Source

Merge pull request #319 from zricethezav/repo-timeout

Repo timeout
Zachary Rice 6 years ago
parent
commit
e6fabdbb31
4 changed files with 83 additions and 3 deletions
  1. 20 0
      audit/audit_test.go
  2. 49 1
      audit/repo.go
  3. 13 1
      audit/util.go
  4. 1 1
      options/options.go

+ 20 - 0
audit/audit_test.go

@@ -213,6 +213,26 @@ func TestAudit(t *testing.T) {
 			},
 			},
 			wantEmpty: true,
 			wantEmpty: true,
 		},
 		},
+		{
+			description: "test local repo one aws leak timeout",
+			opts: options.Options{
+				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Report:       "../test_data/test_local_repo_one_aws_leak.json.got",
+				ReportFormat: "json",
+				Timeout:      "10ns",
+			},
+			wantEmpty: true,
+		},
+		{
+			description: "test local repo one aws leak long timeout",
+			opts: options.Options{
+				RepoPath:     "../test_data/test_repos/test_repo_1",
+				Report:       "../test_data/test_local_repo_one_aws_leak.json.got",
+				ReportFormat: "json",
+				Timeout:      "2m",
+			},
+			wantPath: "../test_data/test_local_repo_one_aws_leak.json",
+		},
 	}
 	}
 
 
 	for _, test := range tests {
 	for _, test := range tests {

+ 49 - 1
audit/repo.go

@@ -2,6 +2,7 @@ package audit
 
 
 import (
 import (
 	"bytes"
 	"bytes"
+	"context"
 	"crypto/md5"
 	"crypto/md5"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
@@ -39,6 +40,10 @@ type Repo struct {
 	// for those repo audits.
 	// for those repo audits.
 	config config.Config
 	config config.Config
 
 
+	// ctx is used to signal timeouts to running goroutines
+	ctx    context.Context
+	cancel context.CancelFunc
+
 	Name    string
 	Name    string
 	Manager *manager.Manager
 	Manager *manager.Manager
 }
 }
@@ -48,6 +53,7 @@ func NewRepo(m *manager.Manager) *Repo {
 	return &Repo{
 	return &Repo{
 		Manager: m,
 		Manager: m,
 		config:  m.Config,
 		config:  m.Config,
+		ctx:     context.Background(),
 	}
 	}
 }
 }
 
 
@@ -93,6 +99,10 @@ func (repo *Repo) AuditUncommitted() error {
 		repo.config = cfg
 		repo.config = cfg
 	}
 	}
 
 
+	if err := repo.setupTimeout(); err != nil {
+		return err
+	}
+
 	auditTimeStart := time.Now()
 	auditTimeStart := time.Now()
 
 
 	r, err := repo.Head()
 	r, err := repo.Head()
@@ -204,6 +214,10 @@ func (repo *Repo) AuditUncommitted() error {
 // git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
 // git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
 // See options/options.go for an explanation on these options.
 // See options/options.go for an explanation on these options.
 func (repo *Repo) Audit() error {
 func (repo *Repo) Audit() error {
+	if err := repo.setupTimeout(); err != nil {
+		return err
+	}
+
 	if repo.Repository == nil {
 	if repo.Repository == nil {
 		return fmt.Errorf("%s repo is empty", repo.Name)
 		return fmt.Errorf("%s repo is empty", repo.Name)
 	}
 	}
@@ -247,7 +261,7 @@ func (repo *Repo) Audit() error {
 	semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
 	semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
 	wg := sync.WaitGroup{}
 	wg := sync.WaitGroup{}
 	err = cIter.ForEach(func(c *object.Commit) error {
 	err = cIter.ForEach(func(c *object.Commit) error {
-		if c == nil || c.Hash.String() == repo.Manager.Opts.CommitTo {
+		if c == nil || c.Hash.String() == repo.Manager.Opts.CommitTo || repo.timeoutReached() {
 			return storer.ErrStop
 			return storer.ErrStop
 		}
 		}
 
 
@@ -274,6 +288,9 @@ func (repo *Repo) Audit() error {
 					return
 					return
 				}
 				}
 			}()
 			}()
+			if repo.timeoutReached() {
+				return nil
+			}
 			start := time.Now()
 			start := time.Now()
 			patch, err := c.Patch(parent)
 			patch, err := c.Patch(parent)
 			if err != nil {
 			if err != nil {
@@ -344,3 +361,34 @@ func (repo *Repo) loadRepoConfig() (config.Config, error) {
 	_, err = toml.DecodeReader(f, &tomlLoader)
 	_, err = toml.DecodeReader(f, &tomlLoader)
 	return tomlLoader.Parse()
 	return tomlLoader.Parse()
 }
 }
+
+// timeoutReached returns true if the timeout deadline has been met. This function should be used
+// at the top of loops and before potentially long running goroutines (like checking inefficient regexes)
+func (repo *Repo) timeoutReached() bool {
+	if repo.ctx.Err() == context.DeadlineExceeded {
+		return true
+	}
+	return false
+}
+
+// setupTimeout parses the --timeout option and assigns a context with timeout to the manager
+// which will exit early if the timeout has been met.
+func (repo *Repo) setupTimeout() error {
+	if repo.Manager.Opts.Timeout == "" {
+		return nil
+	}
+	timeout, err := time.ParseDuration(repo.Manager.Opts.Timeout)
+	if err != nil {
+		return err
+	}
+
+	repo.ctx, repo.cancel = context.WithTimeout(context.Background(), timeout)
+
+	go func() {
+		select {
+		case <-repo.ctx.Done():
+			log.Warnf("Timeout deadline exceeded: %s", timeout.String())
+		}
+	}()
+	return nil
+}

+ 13 - 1
audit/util.go

@@ -25,6 +25,9 @@ import (
 // After that, file chunks are created which are then inspected by InspectString()
 // After that, file chunks are created which are then inspected by InspectString()
 func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
 func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
 	for _, f := range patch.FilePatches() {
 	for _, f := range patch.FilePatches() {
+		if repo.timeoutReached() {
+			return
+		}
 		if f.IsBinary() {
 		if f.IsBinary() {
 			continue
 			continue
 		}
 		}
@@ -119,6 +122,9 @@ func InspectString(content string, c *object.Commit, repo *Repo, filename string
 			// an optimization would be to switch the regex from FindAllIndex to FindString
 			// an optimization would be to switch the regex from FindAllIndex to FindString
 			// since we are iterating on the lines if entropy rules exist...
 			// since we are iterating on the lines if entropy rules exist...
 			for _, line := range strings.Split(content, "\n") {
 			for _, line := range strings.Split(content, "\n") {
+				if repo.timeoutReached() {
+					return
+				}
 				entropyTripped := trippedEntropy(line, rule)
 				entropyTripped := trippedEntropy(line, rule)
 				if entropyTripped && !ruleContainRegex(rule) {
 				if entropyTripped && !ruleContainRegex(rule) {
 					repo.Manager.SendLeaks(manager.Leak{
 					repo.Manager.SendLeaks(manager.Leak{
@@ -135,6 +141,9 @@ func InspectString(content string, c *object.Commit, repo *Repo, filename string
 						File:     filename,
 						File:     filename,
 					})
 					})
 				} else if entropyTripped {
 				} else if entropyTripped {
+					if repo.timeoutReached() {
+						return
+					}
 					// entropy has been tripped which means if there is a regex specified in the same
 					// entropy has been tripped which means if there is a regex specified in the same
 					// rule, we need to inspect the line for a regex match. In otherwords, the current rule has
 					// rule, we need to inspect the line for a regex match. In otherwords, the current rule has
 					// both entropy and regex set which work in combination. This helps narrow down false positives
 					// both entropy and regex set which work in combination. This helps narrow down false positives
@@ -179,6 +188,9 @@ func InspectString(content string, c *object.Commit, repo *Repo, filename string
 		if rule.Regex.String() == "" {
 		if rule.Regex.String() == "" {
 			continue
 			continue
 		}
 		}
+		if repo.timeoutReached() {
+			return
+		}
 		start := time.Now()
 		start := time.Now()
 		locs := rule.Regex.FindAllIndex([]byte(content), -1)
 		locs := rule.Regex.FindAllIndex([]byte(content), -1)
 		if len(locs) != 0 {
 		if len(locs) != 0 {
@@ -255,7 +267,7 @@ func inspectCommit(c *object.Commit, repo *Repo) error {
 
 
 	err = fIter.ForEach(func(f *object.File) error {
 	err = fIter.ForEach(func(f *object.File) error {
 		bin, err := f.IsBinary()
 		bin, err := f.IsBinary()
-		if bin {
+		if bin || repo.timeoutReached() {
 			return nil
 			return nil
 		} else if err != nil {
 		} else if err != nil {
 			return err
 			return err

+ 1 - 1
options/options.go

@@ -32,7 +32,6 @@ type Options struct {
 	Config       string `long:"config" description:"config path"`
 	Config       string `long:"config" description:"config path"`
 	Disk         bool   `long:"disk" description:"Clones repo(s) to disk"`
 	Disk         bool   `long:"disk" description:"Clones repo(s) to disk"`
 	Version      bool   `long:"version" description:"version number"`
 	Version      bool   `long:"version" description:"version number"`
-	Timeout      int    `long:"timeout" description:"Timeout (s)"`
 	Username     string `long:"username" description:"Username for git repo"`
 	Username     string `long:"username" description:"Username for git repo"`
 	Password     string `long:"password" description:"Password for git repo"`
 	Password     string `long:"password" description:"Password for git repo"`
 	AccessToken  string `long:"access-token" description:"Access token for git repo"`
 	AccessToken  string `long:"access-token" description:"Access token for git repo"`
@@ -51,6 +50,7 @@ type Options struct {
 	PrettyPrint  bool   `long:"pretty" description:"Pretty print json if leaks are present"`
 	PrettyPrint  bool   `long:"pretty" description:"Pretty print json if leaks are present"`
 	CommitFrom   string `long:"commit-from" description:"Commit to start audit from"`
 	CommitFrom   string `long:"commit-from" description:"Commit to start audit from"`
 	CommitTo     string `long:"commit-to" description:"Commit to stop audit"`
 	CommitTo     string `long:"commit-to" description:"Commit to stop audit"`
+	Timeout      string `long:"timeout" description:"Time allowed per audit. Ex: 10us, 30s, 1m, 1h10m1s"`
 
 
 	// Hosts
 	// Hosts
 	Host         string `long:"host" description:"git hosting service like gitlab or github. Supported hosts include: Github, Gitlab"`
 	Host         string `long:"host" description:"git hosting service like gitlab or github. Supported hosts include: Github, Gitlab"`