Przeglądaj źródła

file, author, repourl, time, commitmsg to leakElem

zricethezav 8 lat temu
rodzic
commit
c104ff9157
3 zmienionych plików z 118 dodań i 45 usunięć
  1. 25 6
      checks.go
  2. 87 34
      leaks.go
  3. 6 5
      main.go

+ 25 - 6
checks.go

@@ -3,15 +3,30 @@ package main
 import (
 	"math"
 	"strings"
+	_"fmt"
+	"regexp"
 )
 
+
+// TODO LOCAL REPO!!!!
+
 // checks Regex and if enabled, entropy and stopwords
-func doChecks(diff string, commit string, opts *Options) []LeakElem {
-	var match string
-	var leaks []LeakElem
-	var leak LeakElem
+func doChecks(diff string, commit Commit, opts *Options, repo RepoDesc) []LeakElem {
+	var (
+		match string
+		leaks []LeakElem
+		leak  LeakElem
+	)
+
 	lines := strings.Split(diff, "\n")
+	file := ""
 	for _, line := range lines {
+		if strings.Contains(line, "diff --git a"){
+			re := regexp.MustCompile("diff --git a.+b/")
+			idx := re.FindStringIndex(line)
+			file = line[idx[1]:]
+		}
+
 		for leakType, re := range regexes {
 			match = re.FindString(line)
 			if len(match) == 0 ||
@@ -22,11 +37,15 @@ func doChecks(diff string, commit string, opts *Options) []LeakElem {
 
 			leak = LeakElem{
 				Line:     line,
-				Commit:   commit,
+				Commit:   commit.Hash,
 				Offender: match,
 				Reason:   leakType,
+				Msg: commit.Msg,
+				Time: commit.Time,
+				Author: commit.Author,
+				File: file,
+				RepoURL: repo.url,
 			}
-
 			leaks = append(leaks, leak)
 		}
 	}

+ 87 - 34
leaks.go

@@ -12,6 +12,7 @@ import (
 	"path/filepath"
 	"sync"
 	"syscall"
+	"strings"
 )
 
 // LeakElem contains the line and commit of a leak
@@ -20,10 +21,37 @@ type LeakElem struct {
 	Commit   string `json:"commit"`
 	Offender string `json:"string"`
 	Reason   string `json:"reason"`
+	Msg 	 string `json:"commitMsg"`
+	Time 	 string `json:"time"`
+	Author   string `json:"author"`
+	File     string `json:"file"`
+	RepoURL  string `json:"repoURL"`
 }
 
+type Commit struct {
+	Hash string
+	Author string
+	Time string
+	Msg string
+}
+
+func rmTmp(owner *Owner){
+	if _, err := os.Stat(owner.path); err == nil {
+		err := os.RemoveAll(owner.path)
+		log.Printf("Cleaning up tmp repos in %s\n", owner.path)
+		if err != nil {
+			log.Printf("failed to properly remove tmp gitleaks dir: %v", err)
+		}
+	}
+	os.Exit(1)
+}
+
+// start
 func start(repos []RepoDesc, owner *Owner, opts *Options) {
 	var report []LeakElem
+	if opts.Tmp{
+		defer rmTmp(owner)
+	}
 
 	// interrupt handling
 	c := make(chan os.Signal, 2)
@@ -31,49 +59,52 @@ func start(repos []RepoDesc, owner *Owner, opts *Options) {
 	go func() {
 		<-c
 		if opts.Tmp {
-			err := os.RemoveAll(owner.path)
-			if err != nil {
-				log.Printf("failed to properly remove tmp gitleaks dir: %v", err)
-				// exit code?
-			}
-			os.Exit(1)
+			rmTmp(owner)
 		}
+		os.Exit(1)
 	}()
 
 	// run checks on repos
 	for _, repo := range repos {
-		// change to owner root
-		if err := os.Chdir(fmt.Sprintf(owner.path)); err != nil {
-			log.Fatal(err)
-		}
-
 		dotGitPath := filepath.Join(repo.path, ".git")
 		if _, err := os.Stat(dotGitPath); err == nil {
-			report = getLeaks(repo, opts)
+			if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
+				log.Fatal(err)
+			}
+			// use pre-cloned repo
+			fmt.Printf("Checking \x1b[37;1m%s\x1b[0m...\n", repo.url)
+			err := exec.Command("git", "fetch").Run()
+			if err != nil {
+				log.Printf("failed to fetch repo %v", err)
+				return
+			}
+			report = getLeaks(repo, owner, opts)
 		} else {
+			// no repo present, clone it
+			if err := os.Chdir(fmt.Sprintf(owner.path)); err != nil {
+				log.Fatal(err)
+			}
 			fmt.Printf("Cloning \x1b[37;1m%s\x1b[0m...\n", repo.url)
 			err := exec.Command("git", "clone", repo.url).Run()
 			if err != nil {
 				log.Printf("failed to clone repo %v", err)
 				return
 			}
-			report = getLeaks(repo, opts)
+			report = getLeaks(repo, owner, opts)
 		}
 
 		if len(report) == 0 {
 			fmt.Printf("No Leaks detected for \x1b[35;2m%s\x1b[0m...\n\n", repo.url)
 		}
-		fmt.Println(opts.EnableJSON)
-		// write report
+
 		if opts.EnableJSON {
-			writeGitLeaksReport(report, repo, opts)
+			outputGitLeaksReport(report, repo, opts)
 		}
-
 	}
 }
 
-func writeGitLeaksReport(report []LeakElem, repo RepoDesc, opts *Options) {
-	fmt.Println("writing report")
+// outputGitLeaksReport
+func outputGitLeaksReport(report []LeakElem, repo RepoDesc, opts *Options) {
 	reportJSON, _ := json.MarshalIndent(report, "", "\t")
 	if _, err := os.Stat(repo.owner.reportPath); os.IsNotExist(err) {
 		os.Mkdir(repo.owner.reportPath, os.ModePerm)
@@ -84,12 +115,11 @@ func writeGitLeaksReport(report []LeakElem, repo RepoDesc, opts *Options) {
 	err := ioutil.WriteFile(reportFile, reportJSON, 0644)
 	if err != nil {
 		log.Fatalf("Can't write to file: %s", err)
-
 	}
 }
 
 // getLeaks will attempt to find gitleaks
-func getLeaks(repo RepoDesc, opts *Options) []LeakElem {
+func getLeaks(repo RepoDesc, owner *Owner, opts *Options) []LeakElem {
 	var (
 		out               []byte
 		err               error
@@ -99,6 +129,9 @@ func getLeaks(repo RepoDesc, opts *Options) []LeakElem {
 		report            []LeakElem
 	)
 	semaphoreChan := make(chan struct{}, opts.Concurrency)
+	if opts.Tmp{
+		defer rmTmp(owner)
+	}
 
 	go func(commitWG *sync.WaitGroup, gitLeakReceiverWG *sync.WaitGroup) {
 		for gitLeak := range gitLeaks {
@@ -116,41 +149,47 @@ func getLeaks(repo RepoDesc, opts *Options) []LeakElem {
 		log.Fatal(err)
 	}
 
-	out, err = exec.Command("git", "rev-list", "--all", "--remotes", "--topo-order").Output()
+	gitFormat := "--format=%H%n%an%n%s%n%ci"
+	out, err = exec.Command("git", "rev-list", "--all",
+		"--remotes", "--topo-order", gitFormat).Output()
 	if err != nil {
 		log.Fatalf("error retrieving commits%v\n", err)
 	}
 
-	commits := bytes.Split(out, []byte("\n"))
-	for _, currCommitB := range commits {
-		currCommit := string(currCommitB)
-		if currCommit == "" {
+	revListLines := bytes.Split(out, []byte("\n"))
+	commits := parseFormattedRevList(revListLines)
+
+	for _, commit := range commits {
+		if commit.Hash == "" {
 			continue
 		}
-		if currCommit == opts.SinceCommit {
+		if commit.Hash == opts.SinceCommit {
 			break
 		}
 
 		commitWG.Add(1)
-		go func(currCommit string, repoName string, commitWG *sync.WaitGroup,
+		go func(currCommit Commit, repoName string, commitWG *sync.WaitGroup,
 			gitLeakReceiverWG *sync.WaitGroup, opts *Options) {
-
 			defer commitWG.Done()
 			if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
 				log.Fatal(err)
 			}
 
-			commitCmp := fmt.Sprintf("%s^!", currCommit)
+			commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
 			semaphoreChan <- struct{}{}
 			out, err := exec.Command("git", "diff", commitCmp).Output()
 			<-semaphoreChan
 
 			if err != nil {
-				fmt.Printf("error retrieving diff for commit %s try turning concurrency factor down %v\n", currCommit, err)
-				log.Fatal(err)
+				if strings.Contains(err.Error(), "too many files open"){
+					fmt.Printf("error retrieving diff for commit %s. Try turning concurrency down. %v\n", currCommit, err)
+				}
+				if opts.Tmp {
+					rmTmp(owner)
+				}
 			}
 
-			leaks := doChecks(string(out), currCommit, opts)
+			leaks := doChecks(string(out), currCommit, opts, repo)
 			if len(leaks) == 0 {
 				return
 			}
@@ -159,10 +198,24 @@ func getLeaks(repo RepoDesc, opts *Options) []LeakElem {
 				gitLeaks <- leak
 			}
 
-		}(currCommit, repo.name, &commitWG, &gitLeakReceiverWG, opts)
+		}(commit, repo.name, &commitWG, &gitLeakReceiverWG, opts)
 	}
 
 	commitWG.Wait()
 	gitLeakReceiverWG.Wait()
 	return report
 }
+
+func parseFormattedRevList(revList [][]byte) []Commit {
+	var commits []Commit
+	for i := 0; i < len(revList)-1; i=i+5 {
+		commit := Commit{
+			Hash: string(revList[i+1]),
+			Author: string(revList[i+2]),
+			Msg: string(revList[i+3]),
+			Time: string(revList[i+4]),
+		}
+		commits = append(commits, commit)
+	}
+	return commits
+}

+ 6 - 5
main.go

@@ -51,12 +51,12 @@ func init() {
 	regexes = map[string]*regexp.Regexp{
 		"RSA":      regexp.MustCompile("-----BEGIN RSA PRIVATE KEY-----"),
 		"SSH":      regexp.MustCompile("-----BEGIN OPENSSH PRIVATE KEY-----"),
-		"Facebook": regexp.MustCompile("[f|F][a|A][c|C][e|E][b|B][o|O][o|O][k|K].*['|\"][0-9a-f]{32}['|\"]"),
-		"Twitter":  regexp.MustCompile("[t|T][w|W][i|I][t|T][t|T][e|E][r|R].*['|\"][0-9a-zA-Z]{35,44}['|\"]"),
-		"Github":   regexp.MustCompile("[g|G][i|I][t|T][h|H][u|U][b|B].*[['|\"]0-9a-zA-Z]{35,40}['|\"]"),
+		"Facebook": regexp.MustCompile("(?i)facebook.*['|\"][0-9a-f]{32}['|\"]"),
+		"Twitter":  regexp.MustCompile("(?i)twitter.*['|\"][0-9a-zA-Z]{35,44}['|\"]"),
+		"Github":   regexp.MustCompile("(?i)github.*[['|\"]0-9a-zA-Z]{35,40}['|\"]"),
+		"Reddit":   regexp.MustCompile("(?i)reddit.*['|\"][0-9a-zA-Z]{14}['|\"]"),
+		"Heroku": regexp.MustCompile("(?i)heroku.*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}"),
 		"AWS":      regexp.MustCompile("AKIA[0-9A-Z]{16}"),
-		"Reddit":   regexp.MustCompile("[r|R][e|E][d|D][d|D][i|I][t|T].*['|\"][0-9a-zA-Z]{14}['|\"]"),
-		"Heroku":   regexp.MustCompile("[h|H][e|E][r|R][o|O][k|K][u|U].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}"),
 		// "Custom": regexp.MustCompile(".*")
 	}
 	assignRegex = regexp.MustCompile(`(=|:|:=|<-)`)
@@ -79,6 +79,7 @@ func init() {
 	}
 }
 
+// getOwner
 func getOwner(opts *Options) *Owner {
 	var owner Owner
 	if opts.RepoURL != "" {