Explorar el Código

Merge pull request #1 from zricethezav/dev/concurrency

Dev/concurrency
Zachary Rice hace 8 años
padre
commit
132e34e86d
Se han modificado 5 ficheros con 160 adiciones y 164 borrados
  1. 1 33
      checks.go
  2. 131 0
      leaks.go
  3. 6 14
      main.go
  4. 22 17
      options.go
  5. 0 100
      repo.go

+ 1 - 33
checks.go

@@ -1,41 +1,10 @@
 package main
 
 import (
-	_ "fmt"
 	"github.com/nbutton23/zxcvbn-go"
-	"os/exec"
 	"strings"
 )
 
-// checkDiff operates on a single diff between to chronological commits
-func checkDiff(commit1 string, commit2 string) []string {
-	var leakPrs bool
-	var leaks []string
-	_, seen := cache[commit1+commit2]
-	if seen {
-		return []string{}
-	}
-
-	out, err := exec.Command("git", "diff", commit1, commit2).Output()
-	if err != nil {
-		return []string{}
-	}
-
-	cache[commit1+commit2] = true
-	lines := checkRegex(string(out))
-	if len(lines) == 0 {
-		return []string{}
-	}
-
-	for _, line := range lines {
-		leakPrs = checkEntropy(line)
-		if leakPrs {
-			leaks = append(leaks, line)
-		}
-	}
-	return leaks
-}
-
 // check each line of a diff and see if there are any potential secrets
 // [1] https://people.eecs.berkeley.edu/~rohanpadhye/files/key_leaks-msr15.pdf
 func checkRegex(diff string) []string {
@@ -44,7 +13,7 @@ func checkRegex(diff string) []string {
 	lines := strings.Split(diff, "\n")
 	for _, line := range lines {
 		// doubtful a leak would be on a line > 120 characters
-		if len(line) == 0 || len(line) > 80 {
+		if len(line) == 0 || len(line) > 120 {
 			continue
 		}
 		for _, re := range regexes {
@@ -75,7 +44,6 @@ func checkEntropy(target string) bool {
 		return false
 	}
 
-	// entropy := shannonEntropy(target)
 	entropy := zxcvbn.PasswordStrength(target, nil).Entropy
 
 	// tune this/make option

+ 131 - 0
leaks.go

@@ -0,0 +1,131 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"os/signal"
+	"strings"
+	"sync"
+	"syscall"
+)
+
+type ReportElem struct {
+	Lines  []string `json:"lines"`
+	Commit string   `json:"commit"`
+}
+
+type GitLeak struct {
+	leaks  []string
+	commit string
+}
+
+func start(opts *Options, repoUrl string) {
+	c := make(chan os.Signal, 2)
+	signal.Notify(c, os.Interrupt, syscall.SIGTERM)
+
+	err := exec.Command("git", "clone", repoUrl).Run()
+	if err != nil {
+		log.Fatalf("failed to clone repo %v", err)
+	}
+	repoName := strings.Split(repoUrl, "/")[4]
+	if err := os.Chdir(repoName); err != nil {
+		log.Fatal(err)
+	}
+	go func() {
+		<-c
+		cleanup(repoName)
+		os.Exit(1)
+	}()
+
+	report := getLeaks(repoName)
+	cleanup(repoName)
+
+	reportJson, _ := json.MarshalIndent(report, "", "\t")
+	err = ioutil.WriteFile(fmt.Sprintf("%s_leaks.json", repoName), reportJson, 0644)
+}
+
+// cleanup changes to app root and recursive rms target repo
+func cleanup(repoName string) {
+	if err := os.Chdir(appRoot); err != nil {
+		log.Fatalf("failed cleaning up repo. Does the repo exist? %v", err)
+	}
+	err := exec.Command("rm", "-rf", repoName).Run()
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+// audit parses git branch --all
+func getLeaks(repoName string) []ReportElem {
+	var (
+		out           []byte
+		err           error
+		wg            sync.WaitGroup
+		concurrent    = 100
+		semaphoreChan = make(chan struct{}, concurrent)
+		gitLeaks      = make(chan GitLeak)
+	)
+
+	out, err = exec.Command("git", "rev-list", "--all", "--remotes", "--topo-order").Output()
+	if err != nil {
+		log.Fatalf("error retrieving commits%v\n", err)
+	}
+
+	commits := bytes.Split(out, []byte("\n"))
+	for j, currCommitB := range commits {
+		currCommit := string(currCommitB)
+		if j == len(commits)-2 {
+			break
+		}
+
+		wg.Add(1)
+		go func(currCommit string, repoName string) {
+			defer wg.Done()
+			var leakPrs bool
+			var leaks []string
+
+			if err := os.Chdir(fmt.Sprintf("%s/%s", appRoot, repoName)); err != nil {
+				log.Fatal(err)
+			}
+
+			commitCmp := fmt.Sprintf("%s^!", currCommit)
+			semaphoreChan <- struct{}{}
+			out, err := exec.Command("git", "diff", commitCmp).Output()
+			<-semaphoreChan
+
+			if err != nil {
+				return
+			}
+			lines := checkRegex(string(out))
+			if len(lines) == 0 {
+				return
+			}
+
+			for _, line := range lines {
+				leakPrs = checkEntropy(line)
+				if leakPrs {
+					leaks = append(leaks, line)
+				}
+			}
+
+			gitLeaks <- GitLeak{leaks, currCommit}
+
+		}(currCommit, repoName)
+	}
+	go func() {
+		for gitLeak := range gitLeaks {
+			if len(gitLeak.leaks) != 0 {
+				fmt.Println(gitLeak.leaks)
+				report = append(report, ReportElem{gitLeak.leaks, gitLeak.commit})
+			}
+		}
+	}()
+	wg.Wait()
+
+	return report
+}

+ 6 - 14
main.go

@@ -5,9 +5,7 @@ import (
 	"regexp"
 )
 
-
 var (
-	cache       map[string]bool
 	appRoot     string
 	regexes     map[string]*regexp.Regexp
 	assignRegex *regexp.Regexp
@@ -16,9 +14,9 @@ var (
 
 func init() {
 	appRoot, _ = os.Getwd()
-	cache = make(map[string]bool)
 	// TODO update regex to look for things like:
-	// client("fewafewakwafejwkaf",
+	// TODO ability to add/filter regex
+	// client("AKAI32fJ334...",
 	regexes = map[string]*regexp.Regexp{
 		"github":   regexp.MustCompile(`[g|G][i|I][t|T][h|H][u|U][b|B].*(=|:|:=|<-).*\w+.*`),
 		"aws":      regexp.MustCompile(`[a|A][w|W][s|S].*(=|:=|:|<-).*\w+.*`),
@@ -32,14 +30,8 @@ func init() {
 }
 
 func main() {
-	args := os.Args[1:]
-	opts := parseOptions(args)
-	start(opts)
+	args := os.Args[2:]
+	repoUrl := os.Args[1]
+	opts := parseOptions(args, repoUrl)
+	start(opts, repoUrl)
 }
-
-func start(opts *Options) {
-	if opts.Repo != "" {
-		repoStart(opts.Repo)
-	}
-}
-

+ 22 - 17
options.go

@@ -3,26 +3,21 @@ package main
 import (
 	"fmt"
 	"os"
+	"strconv"
 )
 
 // TODO regex on type.. user/organization can be treated as the same:
 // 	hittps://github.com/<user or org>
 // 	hittps://github.com/<user or org>/repo
-const usage = `usage: gogethunt [options]
+const usage = `usage: gitleaks [git link] [options]
 	
 Options:
-	-u --user		Target user
-	-r --repo 		Target repo
-	-o --org 		Target organization
-    -h --help 		Display this message
-	-e --entropy	Enable entropy detection
-	-r --regex 		Enable regex detection
+	-c 			Concurrency factor (potential number of git files open)
+	-h --help 		Display this message
 `
 
 type Options struct {
-	User string
-	Repo string
-	Org  string
+	Concurrency int
 }
 
 func help() {
@@ -30,6 +25,20 @@ func help() {
 	os.Exit(1)
 }
 
+func optionsNextInt(args []string, i *int) int {
+	if len(args) > *i+1 {
+		*i++
+	} else {
+		help()
+	}
+	argInt, err := strconv.Atoi(args[*i])
+	if err != nil {
+		fmt.Printf("Invalid %s option: %s\n", args[*i-1], args[*i])
+		help()
+	}
+	return argInt
+}
+
 func optionsNextString(args []string, i *int) string {
 	if len(args) > *i+1 {
 		*i++
@@ -39,17 +48,13 @@ func optionsNextString(args []string, i *int) string {
 	return args[*i]
 }
 
-func parseOptions(args []string) *Options {
+func parseOptions(args []string, repoUrl string) *Options {
 	opts := &Options{}
 	for i := 0; i < len(args); i++ {
 		arg := args[i]
 		switch arg {
-		case "-o", "--org":
-			opts.Org = optionsNextString(args, &i)
-		case "-r", "--repo":
-			opts.Repo = optionsNextString(args, &i)
-		case "-u", "--user":
-			opts.User = optionsNextString(args, &i)
+		case "-c":
+			opts.Concurrency = optionsNextInt(args, &i)
 		case "-h", "--help":
 			help()
 			return nil

+ 0 - 100
repo.go

@@ -1,100 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"log"
-	"os"
-	"os/exec"
-	"strings"
-)
-
-type ReportElem struct {
-	Lines   []string `json:"lines"`
-	Branch  string   `json:"branch"`
-	CommitA string   `json:"commitA"`
-	CommitB string   `json:"commitB"`
-}
-
-type Repo struct {
-	url  string
-	name string
-	path string
-}
-
-func repoStart(repoUrl string) {
-	err := exec.Command("git", "clone", repoUrl).Run()
-	if err != nil {
-		log.Fatalf("failed to clone repo %v", err)
-	}
-	repoName := strings.Split(repoUrl, "/")[4]
-	if err := os.Chdir(repoName); err != nil {
-		log.Fatal(err)
-	}
-
-	repo := Repo{repoUrl, repoName, ""}
-	report := repo.audit()
-	repo.cleanup()
-
-	reportJson, _ := json.MarshalIndent(report, "", "\t")
-	err = ioutil.WriteFile(fmt.Sprintf("%s_leaks.json", repo.name), reportJson, 0644)
-}
-
-// cleanup changes to app root and recursive rms target repo
-func (repo Repo) cleanup() {
-	if err := os.Chdir(appRoot); err != nil {
-		log.Fatalf("failed cleaning up repo. Does the repo exist? %v", err)
-	}
-	err := exec.Command("rm", "-rf", repo.name).Run()
-	if err != nil {
-		log.Fatal(err)
-	}
-}
-
-// audit parses git branch --all
-func (repo Repo) audit() []ReportElem {
-	var (
-		out     []byte
-		err     error
-		branch  string
-		commits [][]byte
-		leaks   []string
-	)
-
-	out, err = exec.Command("git", "branch", "--all").Output()
-	if err != nil {
-		log.Fatalf("error retrieving branches %v\n", err)
-	}
-
-	// iterate through branches, git rev-list <branch>
-	branches := bytes.Split(out, []byte("\n"))
-	for i, branchB := range branches {
-		if i < 2 || i == len(branches)-1 {
-			continue
-		}
-		// if err := os.Chdir(repo.name); err != nil {
-		// 	log.Fatal(err)
-		// }
-		branch = string(bytes.Trim(branchB, " "))
-		out, err = exec.Command("git", "rev-list", branch).Output()
-		if err != nil {
-			continue
-		}
-		// iterate through commits
-		commits = bytes.Split(out, []byte("\n"))
-		for j, commitB := range commits {
-			if j == len(commits)-2 {
-				break
-			}
-
-			leaks = checkDiff(string(commitB), string(commits[j+1]))
-			if len(leaks) != 0 {
-				report = append(report, ReportElem{leaks, branch,
-					string(commitB), string(commits[j+1])})
-			}
-		}
-	}
-	return report
-}