Преглед изворни кода

Max go (#98)

* default max go, cleaning up for orgs

* bump changelog

* bumping version
Zachary Rice пре 7 година
родитељ
комит
3faec00bf9
4 измењених фајлова са 214 додато и 25 уклоњено
  1. 6 0
      CHANGELOG.md
  2. 6 2
      Makefile
  3. 186 0
      gitleaks_test.go
  4. 16 23
      main.go

+ 6 - 0
CHANGELOG.md

@@ -1,6 +1,12 @@
 CHANGELOG
 =========
 
+1.6.0
+-----
+- Default maximum goroutines spawned is number of cores your cpu run with. See benchmarks in wiki.
+- Cleanup after each repo audit for organizations rather than waiting for the entire organization to complete. Eliminates risk of running out of disk space.
+
+
 1.5.0
 -----
 - Support for CSV reporting

+ 6 - 2
Makefile

@@ -4,13 +4,12 @@ test:
 	go get github.com/golang/lint/golint
 	go fmt
 	golint
-	go test --race -cover
+	go test --race --cover -run=Test$
 deploy:
 	@echo "$(DOCKER_PASSWORD)" | docker login -u "$(DOCKER_USERNAME)" --password-stdin
 	docker build -f Dockerfile -t $(REPO):$(TAG) .
 	echo "Pushing $(REPO):$(COMMIT) $(REPO):$(TAG)"
 	docker push $(REPO)
-
 build-all:
 	rm -rf build
 	mkdir build
@@ -21,3 +20,8 @@ build-all:
 	env GOOS="linux" GOARCH="mips" go build -o "build/gitleaks-linux-mips"
 	env GOOS="linux" GOARCH="mips" go build -o "build/gitleaks-linux-mips"
 	env GOOS="darwin" GOARCH="amd64" go build -o "build/gitleaks-darwin-amd64"
+benchmark:
+	go test -run=Benchmark -bench=. -benchtime=5s
+benchmark-fast:
+	go test -bench=BenchmarkAuditLeakRepo -run=BenchmarkAuditLeakRepo$
+

+ 186 - 0
gitleaks_test.go

@@ -56,6 +56,35 @@ regexes= [
 ]
 `
 
+var benchmarkRepo *Repo
+var benchmarkLeaksRepo *Repo
+
+func getBenchmarkLeaksRepo() *Repo {
+	if benchmarkLeaksRepo != nil {
+		return benchmarkLeaksRepo
+	}
+	leaksR, _ := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
+		URL: "https://github.com/gitleakstest/gronit.git",
+	})
+	benchmarkLeaksRepo = &Repo{
+		repository: leaksR,
+	}
+	return benchmarkLeaksRepo
+}
+
+func getBenchmarkRepo() *Repo {
+	if benchmarkRepo != nil {
+		return benchmarkRepo
+	}
+	bmRepo, _ := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
+		URL: "https://github.com/apple/swift-package-manager.git",
+	})
+	benchmarkRepo = &Repo{
+		repository: bmRepo,
+	}
+	return benchmarkRepo
+}
+
 func TestGetRepo(t *testing.T) {
 	var err error
 	dir, err = ioutil.TempDir("", "gitleaksTestRepo")
@@ -690,3 +719,160 @@ func TestLoadToml(t *testing.T) {
 		})
 	}
 }
+
+func BenchmarkAuditRepo1Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 1
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+
+func BenchmarkAuditRepo2Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 2
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+
+func BenchmarkAuditRepo4Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 4
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+
+func BenchmarkAuditRepo8Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 8
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+
+func BenchmarkAuditRepo10Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 10
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+
+func BenchmarkAuditRepo100Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 100
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+
+func BenchmarkAuditRepo1000Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 1000
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+func BenchmarkAuditRepo10000Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 10000
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+func BenchmarkAuditRepo100000Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 100000
+	benchmarkRepo = getBenchmarkRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkRepo)
+	}
+}
+func BenchmarkAuditLeakRepo1Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 1
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+
+func BenchmarkAuditLeakRepo2Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 2
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+
+func BenchmarkAuditLeakRepo4Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 4
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+
+func BenchmarkAuditLeakRepo8Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 8
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+
+func BenchmarkAuditLeakRepo10Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 10
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+func BenchmarkAuditLeakRepo100Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 100
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+func BenchmarkAuditLeakRepo1000Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 1000
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+
+func BenchmarkAuditLeakRepo10000Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 10000
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}
+
+func BenchmarkAuditLeakRepo100000Proc(b *testing.B) {
+	loadToml()
+	opts.MaxGoRoutines = 100000
+	benchmarkLeaksRepo = getBenchmarkLeaksRepo()
+	for n := 0; n < b.N; n++ {
+		auditRepo(*benchmarkLeaksRepo)
+	}
+}

+ 16 - 23
main.go

@@ -15,6 +15,7 @@ import (
 	"path"
 	"path/filepath"
 	"regexp"
+	"runtime"
 	"strings"
 	"sync"
 	"time"
@@ -118,7 +119,7 @@ type Config struct {
 }
 
 const defaultGithubURL = "https://api.github.com/"
-const version = "1.5.0"
+const version = "1.6.0"
 const defaultConfig = `
 title = "gitleaks config"
 # add regexes to the regex table
@@ -175,10 +176,12 @@ var (
 	fileDiffRegex     *regexp.Regexp
 	sshAuth           *ssh.PublicKeys
 	dir               string
+	maxGo             int
 )
 
 func init() {
 	log.SetOutput(os.Stdout)
+	maxGo = runtime.GOMAXPROCS(0) / 2
 	regexes = make(map[string]*regexp.Regexp)
 	whiteListCommits = make(map[string]bool)
 }
@@ -359,20 +362,16 @@ func getRepo() (Repo, error) {
 // double dip
 func auditRef(repo Repo, ref *plumbing.Reference, commitWg *sync.WaitGroup, commitChan chan []Leak) error {
 	var (
-		err             error
-		prevCommit      *object.Commit
-		limitGoRoutines bool
-		semaphore       chan bool
-		repoName        string
+		err        error
+		prevCommit *object.Commit
+		semaphore  chan bool
+		repoName   string
 	)
-
 	repoName = repo.name
-
-	// goroutine limiting
 	if opts.MaxGoRoutines != 0 {
-		semaphore = make(chan bool, opts.MaxGoRoutines)
-		limitGoRoutines = true
+		maxGo = opts.MaxGoRoutines
 	}
+	semaphore = make(chan bool, maxGo)
 	cIter, err := repo.repository.Log(&git.LogOptions{From: ref.Hash()})
 	if err != nil {
 		return err
@@ -385,9 +384,8 @@ func auditRef(repo Repo, ref *plumbing.Reference, commitWg *sync.WaitGroup, comm
 			log.Infof("skipping commit: %s\n", c.Hash.String())
 			return nil
 		}
-		if limitGoRoutines {
-			semaphore <- true
-		}
+
+		semaphore <- true
 		commitWg.Add(1)
 		go func(c *object.Commit, prevCommit *object.Commit) {
 			var (
@@ -408,9 +406,7 @@ func auditRef(repo Repo, ref *plumbing.Reference, commitWg *sync.WaitGroup, comm
 				})
 				if err != nil {
 					log.Warnf("problem generating diff for commit: %s\n", c.Hash.String())
-					if limitGoRoutines {
-						<-semaphore
-					}
+					<-semaphore
 					commitChan <- leaks
 					return
 				}
@@ -418,9 +414,7 @@ func auditRef(repo Repo, ref *plumbing.Reference, commitWg *sync.WaitGroup, comm
 				patch, err := c.Patch(prevCommit)
 				if err != nil {
 					log.Warnf("problem generating patch for commit: %s\n", c.Hash.String())
-					if limitGoRoutines {
-						<-semaphore
-					}
+					<-semaphore
 					commitChan <- leaks
 					return
 				}
@@ -451,9 +445,7 @@ func auditRef(repo Repo, ref *plumbing.Reference, commitWg *sync.WaitGroup, comm
 					}
 				}
 			}
-			if limitGoRoutines {
-				<-semaphore
-			}
+			<-semaphore
 			commitChan <- leaks
 		}(c, prevCommit)
 		prevCommit = c
@@ -666,6 +658,7 @@ func auditGithubRepo(githubRepo *github.Repository) ([]Leak, error) {
 	log.Infof("cloning: %s", *githubRepo.Name)
 	if opts.Disk {
 		ownerDir, err := ioutil.TempDir(dir, opts.GithubUser)
+		defer os.RemoveAll(fmt.Sprintf("%s/%s", ownerDir, *githubRepo.Name))
 		if err != nil {
 			return leaks, fmt.Errorf("unable to generater owner temp dir: %v", err)
 		}