Browse Source

let the repo handle timeouts... better for automating audits anyway

zricethezav 6 years ago
parent
commit
62e74b2b18
3 changed files with 56 additions and 3 deletions
  1. 42 1
      audit/repo.go
  2. 13 1
      audit/util.go
  3. 1 1
      options/options.go

+ 42 - 1
audit/repo.go

@@ -2,6 +2,7 @@ package audit
 
 import (
 	"bytes"
+	"context"
 	"crypto/md5"
 	"fmt"
 	"io"
@@ -39,6 +40,8 @@ type Repo struct {
 	// for those repo audits.
 	config config.Config
 
+	ctx context.Context
+
 	Name    string
 	Manager *manager.Manager
 }
@@ -200,10 +203,45 @@ func (repo *Repo) AuditUncommitted() error {
 	return nil
 }
 
+// timeoutReached returns true if the timeout deadline has been met. This function should be used
+// at the top of loops and before potentially long running goroutines (like checking inefficient regexes)
+func (repo *Repo) timeoutReached() bool {
+	if repo.ctx.Err() == context.DeadlineExceeded {
+		return true
+	}
+	return false
+}
+
+// setupTimeout parses the --timeout option and assigns a context with timeout to the manager
+// which will exit early if the timeout has been met.
+func (repo *Repo) setupTimeout() error {
+	if repo.Manager.Opts.Timeout == "" {
+		return nil
+	}
+	timeout, err := time.ParseDuration(repo.Manager.Opts.Timeout)
+	if err != nil {
+		return err
+	}
+
+	repo.ctx, _ = context.WithTimeout(context.Background(), timeout)
+
+	go func() {
+		select {
+		case <-repo.ctx.Done():
+			log.Warnf("Timeout deadline exceeded: %s", timeout.String())
+		}
+	}()
+	return nil
+}
+
 // Audit is responsible for scanning the entire history (default behavior) of a
 // git repo. Options that can change the behavior of this function include: --commit, --depth, --branch.
 // See options/options.go for an explanation on these options.
 func (repo *Repo) Audit() error {
+	if err := repo.setupTimeout(); err != nil {
+		return err
+	}
+
 	if repo.Repository == nil {
 		return fmt.Errorf("%s repo is empty", repo.Name)
 	}
@@ -247,7 +285,7 @@ func (repo *Repo) Audit() error {
 	semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
 	wg := sync.WaitGroup{}
 	err = cIter.ForEach(func(c *object.Commit) error {
-		if c == nil || c.Hash.String() == repo.Manager.Opts.CommitTo {
+		if c == nil || c.Hash.String() == repo.Manager.Opts.CommitTo || repo.timeoutReached() {
 			return storer.ErrStop
 		}
 
@@ -274,6 +312,9 @@ func (repo *Repo) Audit() error {
 					return
 				}
 			}()
+			if repo.timeoutReached() {
+				return nil
+			}
 			start := time.Now()
 			patch, err := c.Patch(parent)
 			if err != nil {

+ 13 - 1
audit/util.go

@@ -25,6 +25,9 @@ import (
 // After that, file chunks are created which are then inspected by InspectString()
 func inspectPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
 	for _, f := range patch.FilePatches() {
+		if repo.timeoutReached() {
+			return
+		}
 		if f.IsBinary() {
 			continue
 		}
@@ -119,6 +122,9 @@ func InspectString(content string, c *object.Commit, repo *Repo, filename string
 			// an optimization would be to switch the regex from FindAllIndex to FindString
 			// since we are iterating on the lines if entropy rules exist...
 			for _, line := range strings.Split(content, "\n") {
+				if repo.timeoutReached() {
+					return
+				}
 				entropyTripped := trippedEntropy(line, rule)
 				if entropyTripped && !ruleContainRegex(rule) {
 					repo.Manager.SendLeaks(manager.Leak{
@@ -135,6 +141,9 @@ func InspectString(content string, c *object.Commit, repo *Repo, filename string
 						File:     filename,
 					})
 				} else if entropyTripped {
+					if repo.timeoutReached() {
+						return
+					}
 					// entropy has been tripped which means if there is a regex specified in the same
 					// rule, we need to inspect the line for a regex match. In otherwords, the current rule has
 					// both entropy and regex set which work in combination. This helps narrow down false positives
@@ -179,6 +188,9 @@ func InspectString(content string, c *object.Commit, repo *Repo, filename string
 		if rule.Regex.String() == "" {
 			continue
 		}
+		if repo.timeoutReached() {
+			return
+		}
 		start := time.Now()
 		locs := rule.Regex.FindAllIndex([]byte(content), -1)
 		if len(locs) != 0 {
@@ -255,7 +267,7 @@ func inspectCommit(c *object.Commit, repo *Repo) error {
 
 	err = fIter.ForEach(func(f *object.File) error {
 		bin, err := f.IsBinary()
-		if bin {
+		if bin || repo.timeoutReached() {
 			return nil
 		} else if err != nil {
 			return err

+ 1 - 1
options/options.go

@@ -32,7 +32,6 @@ type Options struct {
 	Config       string `long:"config" description:"config path"`
 	Disk         bool   `long:"disk" description:"Clones repo(s) to disk"`
 	Version      bool   `long:"version" description:"version number"`
-	Timeout      int    `long:"timeout" description:"Timeout (s)"`
 	Username     string `long:"username" description:"Username for git repo"`
 	Password     string `long:"password" description:"Password for git repo"`
 	AccessToken  string `long:"access-token" description:"Access token for git repo"`
@@ -51,6 +50,7 @@ type Options struct {
 	PrettyPrint  bool   `long:"pretty" description:"Pretty print json if leaks are present"`
 	CommitFrom   string `long:"commit-from" description:"Commit to start audit from"`
 	CommitTo     string `long:"commit-to" description:"Commit to stop audit"`
+	Timeout      string `long:"timeout" description:"Time allowed per audit. Ex: 10us, 30s, 1m, 1h10m1s"`
 
 	// Hosts
 	Host         string `long:"host" description:"git hosting service like gitlab or github. Supported hosts include: Github, Gitlab"`