Просмотр исходного кода

Merge pull request #126 from zricethezav/fix/concurrency

Fix/concurrency
Zachary Rice 7 лет назад
Родитель
Сommit
4f7ce9c195
4 измененных файлов с 73 добавлено и 149 удалено
  1. 0 1
      README.md
  2. 3 13
      github.go
  3. 20 44
      gitleaks_test.go
  4. 50 91
      main.go

+ 0 - 1
README.md

@@ -68,7 +68,6 @@ Application Options:
       --github-org=    Github organization to audit
       --github-url=    GitHub API Base URL, use for GitHub Enterprise. Example: https://github.example.com/api/v3/ (default: https://api.github.com/)
       --github-pr=     Github PR url to audit. This does not clone the repo. GITHUB_TOKEN must be set
-  -p, --private        Include private repos in audit
   -b, --branch=        branch name to audit (defaults to HEAD)
   -c, --commit=        sha of commit to stop at
       --depth=         maximum commit depth

+ 3 - 13
github.go

@@ -117,11 +117,7 @@ func auditGithubRepos() ([]Leak, error) {
 			break
 		}
 		if opts.GithubUser != "" {
-			if opts.IncludePrivate {
-				pagedGithubRepos, resp, err = githubClient.Repositories.List(ctx, "", githubOptions)
-			} else {
-				pagedGithubRepos, resp, err = githubClient.Repositories.List(ctx, opts.GithubUser, githubOptions)
-			}
+			pagedGithubRepos, resp, err = githubClient.Repositories.List(ctx, opts.GithubUser, githubOptions)
 			if err != nil {
 				done = true
 			}
@@ -199,10 +195,7 @@ func cloneGithubRepo(githubRepo *github.Repository) (*RepoDescriptor, error) {
 		if err != nil {
 			return nil, fmt.Errorf("unable to generater owner temp dir: %v", err)
 		}
-		if opts.IncludePrivate {
-			if sshAuth == nil {
-				return nil, fmt.Errorf("no ssh auth available")
-			}
+		if sshAuth != nil {
 			repo, err = git.PlainClone(fmt.Sprintf("%s/%s", ownerDir, *githubRepo.Name), false, &git.CloneOptions{
 				URL:  *githubRepo.SSHURL,
 				Auth: sshAuth,
@@ -213,10 +206,7 @@ func cloneGithubRepo(githubRepo *github.Repository) (*RepoDescriptor, error) {
 			})
 		}
 	} else {
-		if opts.IncludePrivate {
-			if sshAuth == nil {
-				return nil, fmt.Errorf("no ssh auth available")
-			}
+		if sshAuth != nil {
 			repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
 				URL:  *githubRepo.SSHURL,
 				Auth: sshAuth,

+ 20 - 44
gitleaks_test.go

@@ -167,17 +167,15 @@ func TestGetRepo(t *testing.T) {
 		},
 		{
 			testOpts: Options{
-				Repo:           "https://github.com/gitleakstest/private",
-				IncludePrivate: true,
+				Repo: "https://github.com/gitleakstest/private",
 			},
 			description:    "test private repo",
 			expectedErrMsg: "invalid auth method",
 		},
 		{
 			testOpts: Options{
-				Repo:           "https://github.com/gitleakstest/private",
-				IncludePrivate: true,
-				Disk:           true,
+				Repo: "https://github.com/gitleakstest/private",
+				Disk: true,
 			},
 			description:    "test private repo",
 			expectedErrMsg: "invalid auth method",
@@ -264,13 +262,12 @@ func TestRun(t *testing.T) {
 		},
 		{
 			testOpts: Options{
-				GithubOrg:      "gitleakstestorg",
-				IncludePrivate: true,
-				SSHKey:         "reallyreallyreallyreallywrongpath",
+				Repo:   "git@github.com:gitleakstest/gronit.git",
+				SSHKey: "trash",
 			},
-			description:    "test private org no ssh",
+			description:    "test leak",
 			numLeaks:       0,
-			expectedErrMsg: "unable to generate ssh key: open reallyreallyreallyreallywrongpath: no such file or directory",
+			expectedErrMsg: "unable to generate ssh key: open trash: no such file or directory",
 		},
 		{
 			testOpts: Options{
@@ -634,28 +631,28 @@ func TestAuditRepo(t *testing.T) {
 			testOpts: Options{
 				Entropy: 4.7,
 			},
-			numLeaks: 7,
+			numLeaks: 6,
 		},
 		{
 			repo:        leaksRepo,
 			description: "Audit until specific commit",
-			numLeaks:    1,
+			numLeaks:    2,
 			testOpts: Options{
 				Commit: "f6839959b7bbdcd23008f1fb16f797f35bcd3a0c",
 			},
 		},
 		{
 			repo:        leaksRepo,
-			description: "commit depth = 1, no leaks",
-			numLeaks:    0,
+			description: "commit depth = 1, one leak",
+			numLeaks:    1,
 			testOpts: Options{
 				Depth: 1,
 			},
 		},
 		{
 			repo:        leaksRepo,
-			description: "commit depth = 2, one leak",
-			numLeaks:    1,
+			description: "commit depth = 2, two leaks",
+			numLeaks:    2,
 			testOpts: Options{
 				Depth: 2,
 			},
@@ -663,7 +660,7 @@ func TestAuditRepo(t *testing.T) {
 		{
 			repo:        leaksRepo,
 			description: "toml entropy range",
-			numLeaks:    422,
+			numLeaks:    283,
 			configPath:  path.Join(configsDir, "entropy"),
 		},
 		{
@@ -751,45 +748,24 @@ func TestOptionGuard(t *testing.T) {
 		},
 		{
 			testOpts: Options{
-				IncludePrivate: true,
-				GithubOrg:      "fakeOrg",
-			},
-			description:    "private org no githubtoken",
-			expectedErrMsg: "user/organization private repos require env var GITHUB_TOKEN to be set",
-			githubToken:    false,
-		},
-		{
-			testOpts: Options{
-				IncludePrivate: true,
-				GithubUser:     "fakeUser",
-			},
-			description:    "private user no githubtoken",
-			expectedErrMsg: "user/organization private repos require env var GITHUB_TOKEN to be set",
-			githubToken:    false,
-		},
-		{
-			testOpts: Options{
-				IncludePrivate: true,
-				GithubUser:     "fakeUser",
-				GithubOrg:      "fakeOrg",
+				GithubUser: "fakeUser",
+				GithubOrg:  "fakeOrg",
 			},
 			description:    "double owner",
 			expectedErrMsg: "github user and organization set",
 		},
 		{
 			testOpts: Options{
-				IncludePrivate: true,
-				GithubOrg:      "fakeOrg",
-				OwnerPath:      "/dev/null",
+				GithubOrg: "fakeOrg",
+				OwnerPath: "/dev/null",
 			},
 			description:    "local and remote target",
 			expectedErrMsg: "github organization set and local owner path",
 		},
 		{
 			testOpts: Options{
-				IncludePrivate: true,
-				GithubUser:     "fakeUser",
-				OwnerPath:      "/dev/null",
+				GithubUser: "fakeUser",
+				OwnerPath:  "/dev/null",
 			},
 			description:    "local and remote target",
 			expectedErrMsg: "github user set and local owner path",

+ 50 - 91
main.go

@@ -60,12 +60,11 @@ type RepoDescriptor struct {
 // Options for gitleaks
 type Options struct {
 	// remote target options
-	Repo           string `short:"r" long:"repo" description:"Repo url to audit"`
-	GithubUser     string `long:"github-user" description:"Github user to audit"`
-	GithubOrg      string `long:"github-org" description:"Github organization to audit"`
-	GithubURL      string `long:"github-url" default:"https://api.github.com/" description:"GitHub API Base URL, use for GitHub Enterprise. Example: https://github.example.com/api/v3/"`
-	GithubPR       string `long:"github-pr" description:"Github PR url to audit. This does not clone the repo. GITHUB_TOKEN must be set"`
-	IncludePrivate bool   `short:"p" long:"private" description:"Include private repos in audit"`
+	Repo       string `short:"r" long:"repo" description:"Repo url to audit"`
+	GithubUser string `long:"github-user" description:"Github user to audit"`
+	GithubOrg  string `long:"github-org" description:"Github organization to audit"`
+	GithubURL  string `long:"github-url" default:"https://api.github.com/" description:"GitHub API Base URL, use for GitHub Enterprise. Example: https://github.example.com/api/v3/"`
+	GithubPR   string `long:"github-pr" description:"Github PR url to audit. This does not clone the repo. GITHUB_TOKEN must be set"`
 
 	/*
 		TODO:
@@ -137,7 +136,7 @@ type entropyRange struct {
 }
 
 const defaultGithubURL = "https://api.github.com/"
-const version = "1.15.0"
+const version = "1.16.0"
 const errExit = 2
 const leakExit = 1
 const defaultConfig = `
@@ -280,13 +279,11 @@ func run() ([]Leak, error) {
 	if err != nil {
 		return nil, err
 	}
-	if opts.IncludePrivate {
-		// if including private repos use ssh as authentication
-		sshAuth, err = getSSHAuth()
-		if err != nil {
-			return nil, err
-		}
+	sshAuth, err = getSSHAuth()
+	if err != nil {
+		return leaks, err
 	}
+
 	if opts.Disk {
 		// temporary directory where all the gitleaks plain clones will reside
 		dir, err = ioutil.TempDir("", "gitleaks")
@@ -319,8 +316,7 @@ func run() ([]Leak, error) {
 			leaks = append(leaksFromRepo, leaks...)
 		}
 	} else if opts.GithubOrg != "" || opts.GithubUser != "" {
-		// Audit a github owner -- a user or organization. If you want to include
-		// private repos you must pass a --private/-p option and have your ssh keys set
+		// Audit a github owner -- a user or organization.
 		leaks, err = auditGithubRepos()
 		if err != nil {
 			return leaks, err
@@ -355,9 +351,7 @@ func writeReport(leaks []Leak) error {
 	return err
 }
 
-// cloneRepo clones a repo to memory(default) or to disk if the --disk option is set. If you want to
-// clone a private repo you must set the --private/-p option, use a ssh target, and have your ssh keys
-// configured. If you want to audit a local repo, getRepo will load up a repo located at --repo-path
+// cloneRepo clones a repo to memory(default) or to disk if the --disk option is set.
 func cloneRepo() (*RepoDescriptor, error) {
 	var (
 		err  error
@@ -372,7 +366,7 @@ func cloneRepo() (*RepoDescriptor, error) {
 	if opts.Disk {
 		log.Infof("cloning %s", opts.Repo)
 		cloneTarget := fmt.Sprintf("%s/%x", dir, md5.Sum([]byte(fmt.Sprintf("%s%s", opts.GithubUser, opts.Repo))))
-		if opts.IncludePrivate {
+		if strings.HasPrefix(opts.Repo, "git") {
 			repo, err = git.PlainClone(cloneTarget, false, &git.CloneOptions{
 				URL:      opts.Repo,
 				Progress: os.Stdout,
@@ -385,11 +379,11 @@ func cloneRepo() (*RepoDescriptor, error) {
 			})
 		}
 	} else if opts.RepoPath != "" {
-		log.Infof("opening %s", opts.Repo)
+		log.Infof("opening %s", opts.RepoPath)
 		repo, err = git.PlainOpen(opts.RepoPath)
 	} else {
 		log.Infof("cloning %s", opts.Repo)
-		if opts.IncludePrivate {
+		if strings.HasPrefix(opts.Repo, "git") {
 			repo, err = git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
 				URL:      opts.Repo,
 				Progress: os.Stdout,
@@ -473,95 +467,58 @@ func auditGitRepo(repo *RepoDescriptor) ([]Leak, error) {
 }
 
 // auditGitReference beings the audit for a git reference. This function will
-// traverse the git reference and audit each line of each diff. Set maximum concurrency with
-// the --max-go option (default is set to the number of cores on your cpu).
+// traverse the git reference and audit each line of each diff.
 func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
 	var (
 		err         error
-		prevCommit  *object.Commit
-		semaphore   chan bool
 		repoName    string
 		leaks       []Leak
+		commitCount int
 		commitWg    sync.WaitGroup
 		mutex       = &sync.Mutex{}
-		commitCount int
+		semaphore   chan bool
 	)
 	repoName = repo.name
 	if opts.MaxGoRoutines != 0 {
 		maxGo = opts.MaxGoRoutines
 	}
-
+	if opts.RepoPath != "" {
+		maxGo = 1
+	}
 	semaphore = make(chan bool, maxGo)
+
 	cIter, err := repo.repository.Log(&git.LogOptions{From: ref.Hash()})
 	if err != nil {
 		return nil
 	}
 	err = cIter.ForEach(func(c *object.Commit) error {
-		if c.Hash.String() == opts.Commit || (opts.Depth != 0 && commitCount == opts.Depth) {
+		if c == nil || c.Hash.String() == opts.Commit || (opts.Depth != 0 && commitCount == opts.Depth) {
 			cIter.Close()
 			return errors.New("ErrStop")
 		}
 		commitCount = commitCount + 1
 		totalCommits = totalCommits + 1
 		if whiteListCommits[c.Hash.String()] {
-			prevCommit = c
 			log.Infof("skipping commit: %s\n", c.Hash.String())
 			return nil
 		}
-		if prevCommit != nil {
-			if whiteListCommits[prevCommit.Hash.String()] {
-				prevCommit = c
-				log.Infof("skipping commit: %s\n", c.Hash.String())
-				return nil
-			}
-		}
 
-		commitWg.Add(1)
-		semaphore <- true
-		go func(c *object.Commit, prevCommit *object.Commit) {
-			var (
-				filePath string
-				skipFile bool
-			)
-			defer func() {
-				commitWg.Done()
-				<-semaphore
-				if r := recover(); r != nil {
-					log.Warnf("recoverying from panic on commit %s, likely large diff causing panic", c.Hash.String())
-				}
-			}()
-
-			if prevCommit == nil {
-				t, _ := c.Tree()
-				files := t.Files()
-				err := files.ForEach(func(file *object.File) error {
-					content, err := file.Contents()
-					if err != nil {
-						return err
+		err = c.Parents().ForEach(func(parent *object.Commit) error {
+			commitWg.Add(1)
+			semaphore <- true
+			go func(c *object.Commit, parent *object.Commit) {
+				var (
+					filePath string
+					skipFile bool
+				)
+				defer func() {
+					commitWg.Done()
+					<-semaphore
+					if r := recover(); r != nil {
+						log.Warnf("recoverying from panic on commit %s, likely large diff causing panic", c.Hash.String())
 					}
-					diff := gitDiff{
-						branchName: string(ref.Name()),
-						repoName:   repoName,
-						filePath:   file.Name,
-						content:    content,
-						sha:        c.Hash.String(),
-						author:     c.Author.String(),
-						message:    c.Message,
-					}
-					chunkLeaks := inspect(diff)
-					for _, leak := range chunkLeaks {
-						mutex.Lock()
-						leaks = append(leaks, leak)
-						mutex.Unlock()
-					}
-					return nil
-				})
-				if err != nil {
-					log.Warnf("problem generating diff for commit: %s\n", c.Hash.String())
-					return
-				}
-			} else {
-				patch, err := c.Patch(prevCommit)
+				}()
+				patch, err := c.Patch(parent)
 				if err != nil {
 					log.Warnf("problem generating patch for commit: %s\n", c.Hash.String())
 					return
@@ -592,9 +549,9 @@ func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
 								repoName:   repoName,
 								filePath:   filePath,
 								content:    chunk.Content(),
-								sha:        prevCommit.Hash.String(),
-								author:     prevCommit.Author.String(),
-								message:    prevCommit.Message,
+								sha:        c.Hash.String(),
+								author:     c.Author.String(),
+								message:    c.Message,
 							}
 							chunkLeaks := inspect(diff)
 							for _, leak := range chunkLeaks {
@@ -605,9 +562,9 @@ func auditGitReference(repo *RepoDescriptor, ref *plumbing.Reference) []Leak {
 						}
 					}
 				}
-			}
-		}(c, prevCommit)
-		prevCommit = c
+			}(c, parent)
+			return nil
+		})
 		return nil
 	})
 	commitWg.Wait()
@@ -773,8 +730,6 @@ func optsGuard() error {
 		return fmt.Errorf("github organization set and local owner path")
 	} else if opts.GithubUser != "" && opts.OwnerPath != "" {
 		return fmt.Errorf("github user set and local owner path")
-	} else if opts.IncludePrivate && os.Getenv("GITHUB_TOKEN") == "" && (opts.GithubOrg != "" || opts.GithubUser != "") {
-		return fmt.Errorf("user/organization private repos require env var GITHUB_TOKEN to be set")
 	}
 
 	// do the URL Parse and error checking here, so we can skip it later
@@ -926,9 +881,13 @@ func getSSHAuth() (*ssh.PublicKeys, error) {
 	}
 	sshAuth, err := ssh.NewPublicKeysFromFile("git", sshKeyPath, "")
 	if err != nil {
-		return nil, fmt.Errorf("unable to generate ssh key: %v", err)
+		if strings.HasPrefix(opts.Repo, "git") {
+			// if you are attempting to clone a git repo via ssh and supply a bad ssh key,
+			// the clone will fail.
+			return nil, fmt.Errorf("unable to generate ssh key: %v", err)
+		}
 	}
-	return sshAuth, err
+	return sshAuth, nil
 }
 
 func (leak Leak) log() {