Răsfoiți Sursa

init 0.2.0 work

zricethezav 8 ani în urmă
părinte
comite
1b06f1a0dc
7 a modificat fișierele cu 297 adăugiri și 170 ștergeri
  1. 28 0
      CHANGELOG.md
  2. 3 3
      checks.go
  3. 11 8
      checks_test.go
  4. 62 50
      leaks.go
  5. 0 54
      leaks_test.go
  6. 170 49
      main.go
  7. 23 6
      options.go

+ 28 - 0
CHANGELOG.md

@@ -0,0 +1,28 @@
+CHANGELOG
+=========
+
+0.2.0
+-----
+Version 0.2.0 of Gitleaks is the first version update since this got relatively popular. Based on the issues raised it seems that folks want better support for integration into their pipelines. I hear ya. This is what this update tries to provide. So... what are the changes?
+
+* Additionally regex checking
+* $HOME/.gitleaks/ directory
+* Clone into temp dir option
+* Persistant repos for Orgs and Users (no more re-cloning)
+* Pagination for Org/User list... no more partial repos
+* Since commit option
+* Updated README
+* Multi-staged Docker build
+* Travis tests
+* More tests
+
+
+
+0.1.0
+-----
+
+Version 0.1.0 of Gitleaks demonstrates:
+
+* full git history search
+* regex/entropy checks
+* report generation

+ 3 - 3
checks.go

@@ -6,7 +6,7 @@ import (
 )
 
 // checks Regex and if enabled, entropy and stopwords
-func doChecks(diff string, commit string) []LeakElem {
+func doChecks(diff string, commit string, opts *Options) []LeakElem {
 	var match string
 	var leaks []LeakElem
 	var leak LeakElem
@@ -16,7 +16,7 @@ func doChecks(diff string, commit string) []LeakElem {
 			match = re.FindString(line)
 			if len(match) == 0 ||
 				(opts.Strict && containsStopWords(line)) ||
-				(opts.Entropy && !checkShannonEntropy(line)) {
+				(opts.Entropy && !checkShannonEntropy(line, opts)) {
 				continue
 			}
 
@@ -35,7 +35,7 @@ func doChecks(diff string, commit string) []LeakElem {
 }
 
 // checkShannonEntropy checks entropy of target
-func checkShannonEntropy(target string) bool {
+func checkShannonEntropy(target string, opts *Options) bool {
 	var (
 		sum             float64
 		targetBase64Len int

+ 11 - 8
checks_test.go

@@ -4,24 +4,21 @@ import (
 	"testing"
 )
 
-func init() {
-	opts = &Options{
+func TestCheckRegex(t *testing.T) {
+	var results []LeakElem
+	opts := &Options{
 		Concurrency:      10,
 		B64EntropyCutoff: 70,
 		HexEntropyCutoff: 40,
 		Entropy:          false,
 	}
-}
-
-func TestCheckRegex(t *testing.T) {
-	var results []LeakElem
 	checks := map[string]int{
 		"aws=\"AKIALALEMEL33243OLIAE": 1,
 		"aws\"afewafewafewafewaf\"":   0,
 	}
 
 	for k, v := range checks {
-		results = doChecks(k, "commit")
+		results = doChecks(k, "commit", opts)
 		if v != len(results) {
 			t.Errorf("regexCheck failed on string %s", k)
 		}
@@ -30,6 +27,12 @@ func TestCheckRegex(t *testing.T) {
 
 func TestEntropy(t *testing.T) {
 	var enoughEntropy bool
+	opts := &Options{
+		Concurrency:      10,
+		B64EntropyCutoff: 70,
+		HexEntropyCutoff: 40,
+		Entropy:          false,
+	}
 	checks := map[string]bool{
 		"reddit_api_secret = settings./.http}":           false,
 		"heroku_client_secret = simple":                  false,
@@ -37,7 +40,7 @@ func TestEntropy(t *testing.T) {
 		"aws_secret= \"AKIAIMNOJVGFDXXFE4OA\"":           true,
 	}
 	for k, v := range checks {
-		enoughEntropy = checkShannonEntropy(k)
+		enoughEntropy = checkShannonEntropy(k, opts)
 		if v != enoughEntropy {
 			t.Errorf("checkEntropy failed for %s. Expected %t, got %t", k, v, enoughEntropy)
 		}

+ 62 - 50
leaks.go

@@ -9,7 +9,7 @@ import (
 	"os"
 	"os/exec"
 	"os/signal"
-	"strings"
+	"path/filepath"
 	"sync"
 	"syscall"
 )
@@ -22,64 +22,74 @@ type LeakElem struct {
 	Reason   string `json:"reason"`
 }
 
-// start clones and determines if there are any leaks
-func start(opts *Options) {
+func start(repos []RepoDesc, owner *Owner, opts *Options) {
+	var report []LeakElem
+
+	// interrupt handling
 	c := make(chan os.Signal, 2)
 	signal.Notify(c, os.Interrupt, syscall.SIGTERM)
-
-	fmt.Printf("Cloning \x1b[37;1m%s\x1b[0m...\n", opts.RepoURL)
-	err := exec.Command("git", "clone", opts.RepoURL).Run()
-	if err != nil {
-		log.Printf("failed to clone repo %v", err)
-		return
-	}
-	fmt.Printf("Evaluating \x1b[37;1m%s\x1b[0m...\n", opts.RepoURL)
-	repoName := getLocalRepoName(opts.RepoURL)
-	if err = os.Chdir(repoName); err != nil {
-		log.Fatal(err)
-	}
 	go func() {
 		<-c
-		cleanup(repoName)
-		os.Exit(1)
+		if opts.Tmp {
+			err := os.RemoveAll(owner.path)
+			if err != nil {
+				log.Printf("failed to properly remove tmp gitleaks dir: %v", err)
+				// exit code?
+			}
+			os.Exit(1)
+		}
 	}()
 
-	report := getLeaks(repoName, opts)
-	if len(report) == 0 {
-		fmt.Printf("No Leaks detected for \x1b[35;2m%s\x1b[0m...\n\n", opts.RepoURL)
-	}
-	cleanup(repoName)
-	reportJSON, _ := json.MarshalIndent(report, "", "\t")
-	err = ioutil.WriteFile(fmt.Sprintf("%s_leaks.json", repoName), reportJSON, 0644)
-	if err != nil {
-		log.Fatalf("Can't write to file: %s", err)
-	}
-}
+	// run checks on repos
+	for _, repo := range repos {
+		// change to owner root
+		if err := os.Chdir(fmt.Sprintf(owner.path)); err != nil {
+			log.Fatal(err)
+		}
 
-// getLocalRepoName generates the name of the local clone folder based on the given URL
-func getLocalRepoName(url string) string {
-	splitSlashes := strings.Split(url, "/")
-	name := splitSlashes[len(splitSlashes)-1]
-	name = strings.TrimSuffix(name, ".git")
-	splitColons := strings.Split(name, ":")
-	name = splitColons[len(splitColons)-1]
+		dotGitPath := filepath.Join(repo.path, ".git")
+		if _, err := os.Stat(dotGitPath); err == nil {
+			report = getLeaks(repo, opts)
+		} else {
+			fmt.Printf("Cloning \x1b[37;1m%s\x1b[0m...\n", repo.url)
+			err := exec.Command("git", "clone", repo.url).Run()
+			if err != nil {
+				log.Printf("failed to clone repo %v", err)
+				return
+			}
+			report = getLeaks(repo, opts)
+		}
+
+		if len(report) == 0 {
+			fmt.Printf("No Leaks detected for \x1b[35;2m%s\x1b[0m...\n\n", repo.url)
+		}
+		fmt.Println(opts.EnableJSON)
+		// write report
+		if opts.EnableJSON {
+			writeGitLeaksReport(report, repo, opts)
+		}
 
-	return name
+	}
 }
 
-// cleanup deletes the repo
-func cleanup(repoName string) {
-	if err := os.Chdir(appRoot); err != nil {
-		log.Fatalf("failed cleaning up repo. Does the repo exist? %v", err)
+func writeGitLeaksReport(report []LeakElem, repo RepoDesc, opts *Options) {
+	fmt.Println("writing report")
+	reportJSON, _ := json.MarshalIndent(report, "", "\t")
+	if _, err := os.Stat(repo.owner.reportPath); os.IsNotExist(err) {
+		os.Mkdir(repo.owner.reportPath, os.ModePerm)
 	}
-	err := exec.Command("rm", "-rf", repoName).Run()
+
+	reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
+	reportFile := filepath.Join(repo.owner.reportPath, reportFileName)
+	err := ioutil.WriteFile(reportFile, reportJSON, 0644)
 	if err != nil {
-		log.Fatal(err)
+		log.Fatalf("Can't write to file: %s", err)
+
 	}
 }
 
 // getLeaks will attempt to find gitleaks
-func getLeaks(repoName string, opts *Options) []LeakElem {
+func getLeaks(repo RepoDesc, opts *Options) []LeakElem {
 	var (
 		out               []byte
 		err               error
@@ -102,6 +112,10 @@ func getLeaks(repoName string, opts *Options) []LeakElem {
 		}
 	}(&commitWG, &gitLeakReceiverWG)
 
+	if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
+		log.Fatal(err)
+	}
+
 	out, err = exec.Command("git", "rev-list", "--all", "--remotes", "--topo-order").Output()
 	if err != nil {
 		log.Fatalf("error retrieving commits%v\n", err)
@@ -119,11 +133,10 @@ func getLeaks(repoName string, opts *Options) []LeakElem {
 
 		commitWG.Add(1)
 		go func(currCommit string, repoName string, commitWG *sync.WaitGroup,
-			gitLeakReceiverWG *sync.WaitGroup) {
+			gitLeakReceiverWG *sync.WaitGroup, opts *Options) {
 
 			defer commitWG.Done()
-
-			if err := os.Chdir(fmt.Sprintf("%s/%s", appRoot, repoName)); err != nil {
+			if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
 				log.Fatal(err)
 			}
 
@@ -134,11 +147,10 @@ func getLeaks(repoName string, opts *Options) []LeakElem {
 
 			if err != nil {
 				fmt.Printf("error retrieving diff for commit %s try turning concurrency factor down %v\n", currCommit, err)
-				cleanup(repoName)
-				return
+				log.Fatal(err)
 			}
 
-			leaks := doChecks(string(out), currCommit)
+			leaks := doChecks(string(out), currCommit, opts)
 			if len(leaks) == 0 {
 				return
 			}
@@ -147,7 +159,7 @@ func getLeaks(repoName string, opts *Options) []LeakElem {
 				gitLeaks <- leak
 			}
 
-		}(currCommit, repoName, &commitWG, &gitLeakReceiverWG)
+		}(currCommit, repo.name, &commitWG, &gitLeakReceiverWG, opts)
 	}
 
 	commitWG.Wait()

+ 0 - 54
leaks_test.go

@@ -1,54 +0,0 @@
-package main
-
-import "testing"
-
-func TestGetLocalRepoName(t *testing.T) {
-	cases := []struct {
-		name     string
-		input    string
-		expected string
-	}{
-		{
-			"Usual github url",
-			"https://github.com/usual/url",
-			"url",
-		},
-		{
-			"Usual github url with .git suffix",
-			"https://github.com/usual/url.git",
-			"url",
-		},
-		{
-			"personal git url",
-			"git@github.com:url.git",
-			"url",
-		},
-		{
-			"personal git url in sub folder",
-			"git@github.com:sub/url.git",
-			"url",
-		},
-		{
-			"ssh git url with port",
-			"ssh://git@github.com:2222/sub/url.git",
-			"url",
-		},
-		{
-			"local git in sub folder",
-			"local/url.git",
-			"url",
-		},
-		{
-			"local git in same folder",
-			"url.git",
-			"url",
-		},
-	}
-
-	for _, c := range cases {
-		actual := getLocalRepoName(c.input)
-		if actual != c.expected {
-			t.Errorf("'%s' failed. Input: '%s'; Expected: '%s'; Got: '%s'", c.input, c.name, c.expected, actual)
-		}
-	}
-}

+ 170 - 49
main.go

@@ -1,39 +1,47 @@
 package main
 
 import (
-	"encoding/json"
-	"fmt"
+	"context"
+	"github.com/google/go-github/github"
+	"github.com/mitchellh/go-homedir"
+	"golang.org/x/oauth2"
+	"io/ioutil"
 	"log"
 	"net/http"
 	"os"
+	"path"
+	"path/filepath"
 	"regexp"
 	"strings"
 )
 
 var (
-	appRoot     string
-	regexes     map[string]*regexp.Regexp
-	stopWords   []string
-	base64Chars string
-	hexChars    string
-	opts        *Options
-	assignRegex *regexp.Regexp
+	regexes      map[string]*regexp.Regexp
+	stopWords    []string
+	base64Chars  string
+	hexChars     string
+	assignRegex  *regexp.Regexp
+	gitLeaksPath string
+	gitLeaksClonePath string
+	gitLeaksReportPath string
 )
 
-// RepoElem used for parsing json from github api
-type RepoElem struct {
-	RepoURL string `json:"html_url"`
+type RepoDesc struct {
+	name  string
+	url   string
+	path  string
+	owner *Owner
 }
 
-func init() {
-	var (
-		err error
-	)
+type Owner struct {
+	name        string
+	url         string
+	accountType string
+	path        string
+	reportPath string
+}
 
-	appRoot, err = os.Getwd()
-	if err != nil {
-		log.Fatalf("Can't get working dir: %s", err)
-	}
+func init() {
 	base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
 	hexChars = "1234567890abcdefABCDEF"
 
@@ -52,46 +60,159 @@ func init() {
 		// "Custom": regexp.MustCompile(".*")
 	}
 	assignRegex = regexp.MustCompile(`(=|:|:=|<-)`)
+	homeDir, err := homedir.Dir()
+	if err != nil {
+		log.Fatal("Cant find home dir")
+	}
+
+	gitLeaksPath = filepath.Join(homeDir, ".gitleaks")
+	if _, err := os.Stat(gitLeaksPath); os.IsNotExist(err) {
+		os.Mkdir(gitLeaksPath, os.ModePerm)
+	}
+	gitLeaksClonePath = filepath.Join(gitLeaksPath, "clones")
+	if _, err := os.Stat(gitLeaksClonePath); os.IsNotExist(err) {
+		os.Mkdir(gitLeaksClonePath, os.ModePerm)
+	}
+	gitLeaksReportPath = filepath.Join(gitLeaksPath, "report")
+	if _, err := os.Stat(gitLeaksReportPath); os.IsNotExist(err) {
+		os.Mkdir(gitLeaksReportPath, os.ModePerm)
+	}
 }
 
-func main() {
-	args := os.Args[1:]
-	opts = parseOptions(args)
+func getOwner(opts *Options) *Owner {
+	var owner Owner
 	if opts.RepoURL != "" {
-		start(opts)
-	} else if opts.UserURL != "" || opts.OrgURL != "" {
-		repoList := repoScan(opts)
-		for _, repo := range repoList {
-			opts.RepoURL = repo.RepoURL
-			start(opts)
+		splitSlashes := strings.Split(opts.RepoURL, "/")
+		owner = Owner{
+			name:        splitSlashes[len(splitSlashes)-2],
+			url:         opts.RepoURL,
+			accountType: "users",
+		}
+
+	} else if opts.UserURL != "" {
+		_, ownerName := path.Split(opts.UserURL)
+		owner = Owner{
+			name:        ownerName,
+			url:         opts.UserURL,
+			accountType: "user",
+		}
+	} else if opts.OrgURL != "" {
+		_, ownerName := path.Split(opts.OrgURL)
+		owner = Owner{
+			name:        ownerName,
+			url:         opts.OrgURL,
+			accountType: "org",
 		}
 	}
+
+	if opts.Tmp {
+		dir, err := ioutil.TempDir("", owner.name)
+		if err != nil {
+			log.Fatal("Cant make temp dir")
+		}
+		owner.path = dir
+	} else {
+		owner.path = filepath.Join(gitLeaksClonePath, owner.name)
+		if _, err := os.Stat(owner.path); os.IsNotExist(err) {
+			os.Mkdir(owner.path, os.ModePerm)
+		}
+	}
+	owner.reportPath = filepath.Join(gitLeaksPath, "report", owner.name)
+	return &owner
 }
 
-// repoScan attempts to parse all repo urls from an organization or user
-func repoScan(opts *Options) []RepoElem {
+// getRepos
+func getRepos(opts *Options, owner *Owner) []RepoDesc {
 	var (
-		targetURL  string
-		target     string
-		targetType string
-		repoList   []RepoElem
+		allRepos  []*github.Repository
+		repos     []*github.Repository
+		repoDescs []RepoDesc
+		resp      *github.Response
+		ctx       = context.Background()
+		err       error
 	)
+	if opts.RepoURL != "" {
+		_, repoName := path.Split(opts.RepoURL)
+		if strings.HasSuffix(repoName, ".git") {
+			repoName = repoName[:len(repoName)-4]
+		}
+		ownerPath := filepath.Join(owner.path, repoName)
+		repo := RepoDesc{
+			name:  repoName,
+			url:   opts.RepoURL,
+			owner: owner,
+			path:  ownerPath}
+		repoDescs = append(repoDescs, repo)
+		return repoDescs
+	}
 
-	if opts.UserURL != "" {
-		targetURL = opts.UserURL
-		targetType = "users"
-	} else {
-		targetURL = opts.OrgURL
-		targetType = "orgs"
+	tokenClient := getAccessToken(opts)
+	gitClient := github.NewClient(tokenClient)
+
+	// TODO include fork check
+	orgOpt := &github.RepositoryListByOrgOptions{
+		ListOptions: github.ListOptions{PerPage: 10},
+	}
+	userOpt := &github.RepositoryListOptions{
+		ListOptions: github.ListOptions{PerPage: 10},
 	}
-	splitTargetURL := strings.Split(targetURL, "/")
-	target = splitTargetURL[len(splitTargetURL)-1]
 
-	resp, err := http.Get(fmt.Sprintf("https://api.github.com/%s/%s/repos", targetType, target))
-	if err != nil {
-		log.Fatal(err)
+	for {
+		if opts.UserURL != "" {
+			repos, resp, err = gitClient.Repositories.List(
+				ctx, owner.name, userOpt)
+		} else if opts.OrgURL != "" {
+			repos, resp, err = gitClient.Repositories.ListByOrg(
+				ctx, owner.name, orgOpt)
+		}
+		allRepos = append(allRepos, repos...)
+		if resp.NextPage == 0 || err != nil {
+			break
+		}
+
+		for _, repo := range repos {
+			repoPath := filepath.Join(owner.path, *repo.Name)
+			repoDescs = append(repoDescs,
+				RepoDesc{
+					name:  *repo.Name,
+					url:   *repo.CloneURL,
+					owner: owner,
+					path:  repoPath})
+		}
+
+		orgOpt.Page = resp.NextPage
+		userOpt.Page = resp.NextPage
 	}
-	defer resp.Body.Close()
-	json.NewDecoder(resp.Body).Decode(&repoList)
-	return repoList
+
+	return repoDescs
+}
+
+// getAccessToken checks
+// 1. option
+// 2. env var
+// TODO. $HOME/.gitleaks/.creds
+func getAccessToken(opts *Options) *http.Client {
+	var token string
+	if opts.Token != "" {
+		token = opts.Token
+	} else {
+		token = os.Getenv("GITHUB_TOKEN")
+	}
+	if token == "" {
+		return nil
+	}
+
+	tokenService := oauth2.StaticTokenSource(
+		&oauth2.Token{AccessToken: token},
+	)
+	tokenClient := oauth2.NewClient(context.Background(), tokenService)
+	return tokenClient
+}
+
+func main() {
+	args := os.Args[1:]
+	opts := parseOptions(args)
+	owner := getOwner(opts)
+	repos := getRepos(opts, owner)
+	start(repos, owner, opts)
 }

+ 23 - 6
options.go

@@ -9,10 +9,10 @@ import (
 const usage = `usage: gitleaks [options] <url>
 
 Options:
- -c 			Concurrency factor (default is 10)
- -u --user 		Git user url
- -r --repo 		Git repo url
- -o --org 		Git organization url
+ -c --concurrency 	Concurrency factor (default is 10)
+ -u --user 			Git user url
+ -r --repo 			Git repo url
+ -o --org 			Git organization url
  -s --since 		Commit to stop at
  -b --b64Entropy 	Base64 entropy cutoff (default is 70)
  -x --hexEntropy  	Hex entropy cutoff (default is 40)
@@ -32,6 +32,12 @@ type Options struct {
 	Strict           bool
 	Entropy          bool
 	SinceCommit      string
+	Persist          bool
+	IncludeForks     bool
+	Tmp              bool
+	EnableJSON       bool
+	Token            string
+	Verbose 		 bool
 }
 
 // help prints the usage string and exits
@@ -72,7 +78,6 @@ func parseOptions(args []string) *Options {
 		Concurrency:      10,
 		B64EntropyCutoff: 70,
 		HexEntropyCutoff: 40,
-		Entropy:          false,
 	}
 
 	if len(args) == 0 {
@@ -92,14 +97,26 @@ func parseOptions(args []string) *Options {
 			opts.HexEntropyCutoff = optionsNextInt(args, &i)
 		case "-e", "--entropy":
 			opts.Entropy = true
-		case "-c":
+		case "-c", "--concurrency":
 			opts.Concurrency = optionsNextInt(args, &i)
 		case "-o", "--org":
 			opts.OrgURL = optionsNextString(args, &i)
 		case "-u", "--user":
 			opts.UserURL = optionsNextString(args, &i)
+		case "-p", "--persist":
+			opts.UserURL = optionsNextString(args, &i)
 		case "-r", "--repo":
 			opts.RepoURL = optionsNextString(args, &i)
+		case "-f", "--forks":
+			opts.IncludeForks = true
+		case "-t", "--temporary":
+			opts.Tmp = true
+		case "-gt", "--token":
+			opts.Token = optionsNextString(args, &i)
+		case "-j", "--json":
+			opts.EnableJSON = true
+		case "-v", "--verbose":
+			opts.Verbose = true
 		case "-h", "--help":
 			help()
 			return nil