Jelajahi Sumber

refactoring after actually learning some go patterns

zricethezav 8 tahun lalu
induk
melakukan
faca0e14d9
6 mengubah file dengan 548 tambahan dan 349 penghapusan
  1. 5 5
      checks.go
  2. 2 2
      checks_test.go
  3. 29 180
      leaks.go
  4. 37 162
      main.go
  5. 251 0
      owner.go
  6. 224 0
      repo.go

+ 5 - 5
checks.go

@@ -9,11 +9,11 @@ import (
 // TODO LOCAL REPO!!!!
 
 // checks Regex and if enabled, entropy and stopwords
-func doChecks(diff string, commit Commit, opts *Options, repo RepoDesc) []LeakElem {
+func doChecks(diff string, commit Commit, opts *Options, repo *Repo) []Leak {
 	var (
 		match string
-		leaks []LeakElem
-		leak  LeakElem
+		leaks []Leak
+		leak  Leak
 	)
 
 	lines := strings.Split(diff, "\n")
@@ -34,7 +34,7 @@ func doChecks(diff string, commit Commit, opts *Options, repo RepoDesc) []LeakEl
 				continue
 			}
 
-			leak = LeakElem{
+			leak = Leak{
 				Line:     line,
 				Commit:   commit.Hash,
 				Offender: match,
@@ -109,7 +109,7 @@ func checkShannonEntropy(target string, opts *Options) bool {
 func containsStopWords(target string) bool {
 	// Convert to lowercase to reduce the number of loops needed.
 	target = strings.ToLower(target)
-	
+
 	for _, stopWord := range stopWords {
 		if strings.Contains(target, stopWord) {
 			return true

+ 2 - 2
checks_test.go

@@ -5,14 +5,14 @@ import (
 )
 
 func TestCheckRegex(t *testing.T) {
-	var results []LeakElem
+	var results []Leak
 	opts := &Options{
 		Concurrency:      10,
 		B64EntropyCutoff: 70,
 		HexEntropyCutoff: 40,
 		Entropy:          false,
 	}
-	repo := RepoDesc{
+	repo := Repo{
 		url: "someurl",
 	}
 	commit := Commit{}

+ 29 - 180
leaks.go

@@ -1,66 +1,25 @@
 package main
 
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"log"
-	"os"
-	"os/exec"
-	"os/signal"
-	"path/filepath"
-	"strings"
-	"sync"
-	"syscall"
-)
+// TODO https://medium.com/@sebdah/go-best-practices-error-handling-2d15e1f0c5ee
+// implement better error handling
 
-// LeakElem contains the line and commit of a leak
-type LeakElem struct {
-	Line     string `json:"line"`
-	Commit   string `json:"commit"`
-	Offender string `json:"string"`
-	Reason   string `json:"reason"`
-	Msg      string `json:"commitMsg"`
-	Time     string `json:"time"`
-	Author   string `json:"author"`
-	File     string `json:"file"`
-	RepoURL  string `json:"repoURL"`
-}
-
-type Commit struct {
-	Hash   string
-	Author string
-	Time   string
-	Msg    string
-}
-
-func rmTmp(owner *Owner) {
-	if _, err := os.Stat(owner.path); err == nil {
-		err := os.RemoveAll(owner.path)
-		log.Printf("\nCleaning up tmp repos in %s\n", owner.path)
-		if err != nil {
-			log.Printf("failed to properly remove tmp gitleaks dir: %v", err)
-		}
-	}
-	os.Exit(1)
-}
+// Commit is so and so
 
-// start
-func start(repos []RepoDesc, owner *Owner, opts *Options) {
-	var report []LeakElem
-	if opts.Tmp {
-		defer rmTmp(owner)
-	}
+/*
+// start kicks off the audit
+func start(repos []Repo, owner *Owner, opts *Options) error {
+	var (
+		report []Leak
+		err error
+	)
+	defer rmTmpDirs(owner, opts)
 
 	// interrupt handling
 	c := make(chan os.Signal, 2)
 	signal.Notify(c, os.Interrupt, syscall.SIGTERM)
 	go func() {
 		<-c
-		if opts.Tmp {
-			rmTmp(owner)
-		}
+		rmTmpDirs(owner, opts)
 		os.Exit(1)
 	}()
 
@@ -73,24 +32,23 @@ func start(repos []RepoDesc, owner *Owner, opts *Options) {
 			}
 			// use pre-cloned repo
 			fmt.Printf("Checking \x1b[37;1m%s\x1b[0m...\n", repo.url)
-			err := exec.Command("git", "fetch").Run()
-			if err != nil {
-				log.Printf("failed to fetch repo %v", err)
-				return
-			}
-			report = getLeaks(repo, owner, opts)
+			err = exec.Command("git", "fetch").Run()
 		} else {
 			// no repo present, clone it
 			if err := os.Chdir(fmt.Sprintf(owner.path)); err != nil {
 				log.Fatal(err)
 			}
 			fmt.Printf("Cloning \x1b[37;1m%s\x1b[0m...\n", repo.url)
-			err := exec.Command("git", "clone", repo.url).Run()
-			if err != nil {
-				fmt.Printf("failed to clone repo %v", err)
-				return
-			}
-			report = getLeaks(repo, owner, opts)
+			err = exec.Command("git", "clone", repo.url).Run()
+		}
+		if err != nil {
+			log.Printf("failed to fetch repo %v", err)
+			return nil
+		}
+
+		report, err = audit(&repo, opts)
+		if err != nil {
+			return nil
 		}
 
 		if len(report) == 0 {
@@ -98,123 +56,14 @@ func start(repos []RepoDesc, owner *Owner, opts *Options) {
 		}
 
 		if opts.EnableJSON && len(report) != 0 {
-			outputGitLeaksReport(report, repo, opts)
+			writeReport(report, repo)
 		}
 	}
+	return nil
 }
+*/
 
-// outputGitLeaksReport
-func outputGitLeaksReport(report []LeakElem, repo RepoDesc, opts *Options) {
-	reportJSON, _ := json.MarshalIndent(report, "", "\t")
-	if _, err := os.Stat(repo.owner.reportPath); os.IsNotExist(err) {
-		os.Mkdir(repo.owner.reportPath, os.ModePerm)
-	}
-
-	reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
-	reportFile := filepath.Join(repo.owner.reportPath, reportFileName)
-	err := ioutil.WriteFile(reportFile, reportJSON, 0644)
-	if err != nil {
-		log.Fatalf("Can't write to file: %s", err)
-	}
-	fmt.Printf("Report written to %s\n", reportFile)
-}
-
-// getLeaks will attempt to find gitleaks
-func getLeaks(repo RepoDesc, owner *Owner, opts *Options) []LeakElem {
-	var (
-		out               []byte
-		err               error
-		commitWG          sync.WaitGroup
-		gitLeakReceiverWG sync.WaitGroup
-		gitLeaks          = make(chan LeakElem)
-		report            []LeakElem
-	)
-	semaphoreChan := make(chan struct{}, opts.Concurrency)
+// Used by start, writeReport will generate a report and write it out to
+// $GITLEAKS_HOME/report/<owner>/<repo>. No report will be generated if
+// no leaks have been found
 
-	go func(commitWG *sync.WaitGroup, gitLeakReceiverWG *sync.WaitGroup) {
-		for gitLeak := range gitLeaks {
-			b, err := json.MarshalIndent(gitLeak, "", "   ")
-			if err != nil {
-				fmt.Println("failed to output leak:", err)
-			}
-			fmt.Println(string(b))
-			report = append(report, gitLeak)
-			gitLeakReceiverWG.Done()
-		}
-	}(&commitWG, &gitLeakReceiverWG)
-
-	if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
-		log.Fatal(err)
-	}
-
-	gitFormat := "--format=%H%n%an%n%s%n%ci"
-	out, err = exec.Command("git", "rev-list", "--all",
-		"--remotes", "--topo-order", gitFormat).Output()
-	if err != nil {
-		log.Fatalf("error retrieving commits%v\n", err)
-	}
-
-	revListLines := bytes.Split(out, []byte("\n"))
-	commits := parseFormattedRevList(revListLines)
-
-	for _, commit := range commits {
-		if commit.Hash == "" {
-			continue
-		}
-
-		commitWG.Add(1)
-		go func(currCommit Commit, repoName string, commitWG *sync.WaitGroup,
-			gitLeakReceiverWG *sync.WaitGroup, opts *Options) {
-			defer commitWG.Done()
-			if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
-				log.Fatal(err)
-			}
-
-			commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
-			semaphoreChan <- struct{}{}
-			out, err := exec.Command("git", "diff", commitCmp).Output()
-			<-semaphoreChan
-
-			if err != nil {
-				if strings.Contains(err.Error(), "too many files open") {
-					log.Printf("error retrieving diff for commit %s. Try turning concurrency down. %v\n", currCommit, err)
-				}
-				if opts.Tmp {
-					rmTmp(owner)
-				}
-			}
-
-			leaks := doChecks(string(out), currCommit, opts, repo)
-			if len(leaks) == 0 {
-				return
-			}
-			for _, leak := range leaks {
-				gitLeakReceiverWG.Add(1)
-				gitLeaks <- leak
-			}
-
-		}(commit, repo.name, &commitWG, &gitLeakReceiverWG, opts)
-
-		if commit.Hash == opts.SinceCommit {
-			break
-		}
-	}
-
-	commitWG.Wait()
-	gitLeakReceiverWG.Wait()
-	return report
-}
-
-func parseFormattedRevList(revList [][]byte) []Commit {
-	var commits []Commit
-	for i := 0; i < len(revList)-1; i = i + 5 {
-		commit := Commit{
-			Hash:   string(revList[i+1]),
-			Author: string(revList[i+2]),
-			Msg:    string(revList[i+3]),
-			Time:   string(revList[i+4]),
-		}
-		commits = append(commits, commit)
-	}
-	return commits
-}

+ 37 - 162
main.go

@@ -1,20 +1,22 @@
 package main
 
 import (
-	"context"
-	"github.com/google/go-github/github"
+	_ "fmt"
 	"github.com/mitchellh/go-homedir"
-	"golang.org/x/oauth2"
-	"io/ioutil"
 	"log"
-	"net/http"
+	_"io/ioutil"
 	"os"
-	"path"
 	"path/filepath"
 	"regexp"
-	"strings"
+	"go.uber.org/zap"
+	_"time"
+	"go.uber.org/zap/zapcore"
 )
 
+const EXIT_CLEAN = 0
+const EXIT_FAILURE = 1
+const EXIT_LEAKS = 2
+
 var (
 	regexes            map[string]*regexp.Regexp
 	stopWords          []string
@@ -25,29 +27,13 @@ var (
 	gitLeaksPath       string
 	gitLeaksClonePath  string
 	gitLeaksReportPath string
+	logger  *zap.Logger
 )
 
-type RepoDesc struct {
-	name  string
-	url   string
-	path  string
-	owner *Owner
-}
-
-type Owner struct {
-	name        string
-	url         string
-	accountType string
-	path        string
-	reportPath  string
-}
-
 func init() {
 	base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
 	hexChars = "1234567890abcdefABCDEF"
-
 	stopWords = []string{"setting", "info", "env", "environment"}
-
 	regexes = map[string]*regexp.Regexp{
 		"PKCS8":    regexp.MustCompile("-----BEGIN PRIVATE KEY-----"),
 		"RSA":      regexp.MustCompile("-----BEGIN RSA PRIVATE KEY-----"),
@@ -60,14 +46,19 @@ func init() {
 		"Heroku":   regexp.MustCompile("(?i)heroku.*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}"),
 		// "Custom": regexp.MustCompile(".*")
 	}
-	assignRegex = regexp.MustCompile(`(=|:|:=|<-)`)
 	fileDiffRegex = regexp.MustCompile("diff --git a.+b/")
-	homeDir, err := homedir.Dir()
-	if err != nil {
-		log.Fatal("Cant find home dir")
+	assignRegex = regexp.MustCompile(`(=|:|:=|<-)`)
+
+	// gitleaks dir defaults to $HOME/.gitleaks if no env var GITLEAKS_HOME is present.
+	gitLeaksPath = os.Getenv("GITLEAKS_HOME")
+	if gitLeaksPath == "" {
+		homeDir, err := homedir.Dir()
+		if err != nil {
+			log.Fatal("Cant find home dir")
+		}
+		gitLeaksPath = filepath.Join(homeDir, ".gitleaks")
 	}
 
-	gitLeaksPath = filepath.Join(homeDir, ".gitleaks")
 	if _, err := os.Stat(gitLeaksPath); os.IsNotExist(err) {
 		os.Mkdir(gitLeaksPath, os.ModePerm)
 	}
@@ -81,141 +72,25 @@ func init() {
 	}
 }
 
-// getOwner
-func getOwner(opts *Options) *Owner {
-	var owner Owner
-	if opts.RepoURL != "" {
-		splitSlashes := strings.Split(opts.RepoURL, "/")
-		owner = Owner{
-			name:        splitSlashes[len(splitSlashes)-2],
-			url:         opts.RepoURL,
-			accountType: "users",
-		}
-
-	} else if opts.UserURL != "" {
-		_, ownerName := path.Split(opts.UserURL)
-		owner = Owner{
-			name:        ownerName,
-			url:         opts.UserURL,
-			accountType: "user",
-		}
-	} else if opts.OrgURL != "" {
-		_, ownerName := path.Split(opts.OrgURL)
-		owner = Owner{
-			name:        ownerName,
-			url:         opts.OrgURL,
-			accountType: "org",
-		}
-	}
-
-	if opts.Tmp {
-		dir, err := ioutil.TempDir("", owner.name)
-		if err != nil {
-			log.Fatal("Cant make temp dir")
-		}
-		owner.path = dir
-	} else {
-		owner.path = filepath.Join(gitLeaksClonePath, owner.name)
-		if _, err := os.Stat(owner.path); os.IsNotExist(err) {
-			os.Mkdir(owner.path, os.ModePerm)
-		}
-	}
-	owner.reportPath = filepath.Join(gitLeaksPath, "report", owner.name)
-	return &owner
-}
-
-// getRepos
-func getRepos(opts *Options, owner *Owner) []RepoDesc {
-	var (
-		allRepos  []*github.Repository
-		repos     []*github.Repository
-		repoDescs []RepoDesc
-		resp      *github.Response
-		ctx       = context.Background()
-		err       error
-	)
-	if opts.RepoURL != "" {
-		_, repoName := path.Split(opts.RepoURL)
-		if strings.HasSuffix(repoName, ".git") {
-			repoName = repoName[:len(repoName)-4]
-		}
-		ownerPath := filepath.Join(owner.path, repoName)
-		repo := RepoDesc{
-			name:  repoName,
-			url:   opts.RepoURL,
-			owner: owner,
-			path:  ownerPath}
-		repoDescs = append(repoDescs, repo)
-		return repoDescs
-	}
-
-	tokenClient := getAccessToken(opts)
-	gitClient := github.NewClient(tokenClient)
-
-	// TODO include fork check
-	orgOpt := &github.RepositoryListByOrgOptions{
-		ListOptions: github.ListOptions{PerPage: 10},
-	}
-	userOpt := &github.RepositoryListOptions{
-		ListOptions: github.ListOptions{PerPage: 10},
-	}
-
-	for {
-		if opts.UserURL != "" {
-			repos, resp, err = gitClient.Repositories.List(
-				ctx, owner.name, userOpt)
-		} else if opts.OrgURL != "" {
-			repos, resp, err = gitClient.Repositories.ListByOrg(
-				ctx, owner.name, orgOpt)
-		}
-		allRepos = append(allRepos, repos...)
-		if resp.NextPage == 0 || err != nil {
-			break
-		}
-
-		for _, repo := range repos {
-			repoPath := filepath.Join(owner.path, *repo.Name)
-			repoDescs = append(repoDescs,
-				RepoDesc{
-					name:  *repo.Name,
-					url:   *repo.CloneURL,
-					owner: owner,
-					path:  repoPath})
-		}
-
-		orgOpt.Page = resp.NextPage
-		userOpt.Page = resp.NextPage
-	}
-
-	return repoDescs
-}
-
-// getAccessToken checks
-// 1. option
-// 2. env var
-// TODO. $HOME/.gitleaks/.creds
-func getAccessToken(opts *Options) *http.Client {
-	var token string
-	if opts.Token != "" {
-		token = opts.Token
-	} else {
-		token = os.Getenv("GITHUB_TOKEN")
-	}
-	if token == "" {
-		return nil
-	}
+func main() {
+	// TODO abstract logging
+	atom := zap.NewAtomicLevel()
+	encoderCfg := zap.NewProductionEncoderConfig()
+	encoderCfg.TimeKey = ""
+	logger = zap.New(zapcore.NewCore(
+		zapcore.NewJSONEncoder(encoderCfg),
+		zapcore.Lock(os.Stdout),
+		atom,
+	))
+	logger.Info("HEY")
+	atom.SetLevel(zap.InfoLevel)
+	logger.Info("HEY")
 
-	tokenService := oauth2.StaticTokenSource(
-		&oauth2.Token{AccessToken: token},
-	)
-	tokenClient := oauth2.NewClient(context.Background(), tokenService)
-	return tokenClient
-}
 
-func main() {
 	args := os.Args[1:]
 	opts := parseOptions(args)
-	owner := getOwner(opts)
-	repos := getRepos(opts, owner)
-	start(repos, owner, opts)
+	owner := newOwner(opts)
+	owner.auditRepos(opts)
+	// repos := getRepos(opts, owner)
+	// start(repos, owner, opts)
 }

+ 251 - 0
owner.go

@@ -0,0 +1,251 @@
+package main
+
+import (
+	"path"
+	"io/ioutil"
+	"path/filepath"
+	"os"
+	"github.com/google/go-github/github"
+	"strings"
+	"context"
+	"golang.org/x/oauth2"
+	"net/http"
+	"log"
+	"os/signal"
+	_"fmt"
+)
+
+type Owner struct {
+	name        string
+	url         string
+	accountType string
+	path        string
+	reportPath  string
+	repos      []Repo
+}
+
+// newOwner instantiates an owner and creates any necessary resources for said owner.
+// newOwner returns a Owner struct pointer
+func newOwner(opts *Options) *Owner {
+	name, err := ownerName(opts)
+	owner := &Owner{
+		name:        name,
+		url:         opts.UserURL,
+		accountType: ownerType(opts),
+	}
+
+	if err != nil {
+		owner.failf()
+	}
+
+	// listen for ctrl-c
+	// NOTE: need some help on how to actually shut down gracefully.
+	// On interrupt a repo may still be trying to clone... This has no
+	// actual effect other than extraneous logging.
+	sigC := make(chan os.Signal, 1)
+	signal.Notify(sigC, os.Interrupt, os.Interrupt)
+	go func() {
+		<-sigC
+		owner.rmTmp()
+	}()
+
+	owner.setupDir(opts)
+	owner.fetchRepos(opts)
+	return owner
+}
+
+// fetchRepos is used by newOwner and is responsible for fetching one or more
+// of the owner's repos. If opts.RepoURL is not the empty string then fetchRepos will
+// only grab the repo specified in opts.RepoURL. Otherwise, fetchRepos will reach out to
+// github's api and grab all repos associated with owner.
+func (owner *Owner) fetchRepos(opts *Options) {
+	ctx := context.Background()
+	if owner.accountType == "" {
+		// single repo, ambiguous account type
+		_, repoName := path.Split(opts.RepoURL)
+		repo := newRepo(owner, repoName, opts.RepoURL)
+		owner.repos = append(owner.repos, *repo)
+	} else {
+		// org or user account type, would fail if not valid before
+		tokenClient := githubTokenClient(opts)
+		gitClient := github.NewClient(tokenClient)
+
+		if owner.accountType == "org" {
+			// org account type
+			orgOpt := &github.RepositoryListByOrgOptions{
+				ListOptions: github.ListOptions{PerPage: 10},
+			}
+			owner.fetchOrgRepos(orgOpt, gitClient, ctx)
+		} else {
+			// user account type
+			userOpt := &github.RepositoryListOptions{
+				ListOptions: github.ListOptions{PerPage: 10},
+			}
+			owner.fetchUserRepos(userOpt, gitClient, ctx)
+		}
+	}
+}
+
+// fetchOrgRepos used by fetchRepos is responsible for parsing github's org repo response. If no
+// github token is available then fetchOrgRepos might run into a rate limit in which case owner will
+// log an error and gitleaks will exit. The rate limit for no token is 50 req/hour... not much.
+func (owner *Owner) fetchOrgRepos(orgOpts *github.RepositoryListByOrgOptions, gitClient *github.Client,
+	ctx context.Context) {
+	var (
+		githubRepos     []*github.Repository
+		resp 			 *github.Response
+		err 			error
+	)
+
+	for {
+		githubRepos, resp, err = gitClient.Repositories.ListByOrg(
+			ctx, owner.name, orgOpts)
+		owner.addRepos(githubRepos)
+		if _, ok := err.(*github.RateLimitError); ok {
+			log.Println("hit rate limit")
+			break
+		} else if err != nil {
+			log.Println("other error")
+			break
+		} else if resp.NextPage == 0 {
+			break
+		}
+		orgOpts.Page = resp.NextPage
+	}
+}
+
+// fetchUserRepos used by fetchRepos is responsible for parsing github's user repo response. If no
+// github token is available then fetchUserRepos might run into a rate limit in which case owner will
+// log an error and gitleaks will exit. The rate limit for no token is 50 req/hour... not much.
+// sorry for the redundancy
+func (owner *Owner) fetchUserRepos(userOpts *github.RepositoryListOptions, gitClient *github.Client,
+	ctx context.Context) {
+	var (
+		githubRepos     []*github.Repository
+		resp 			 *github.Response
+		err 			error
+	)
+	for {
+		githubRepos, resp, err = gitClient.Repositories.List(
+			ctx, owner.name, userOpts)
+		owner.addRepos(githubRepos)
+		if _, ok := err.(*github.RateLimitError); ok {
+			log.Println("hit rate limit")
+			break
+		} else if err != nil {
+			log.Println("other error")
+			break
+		} else if resp.NextPage == 0 {
+			break
+		}
+		userOpts.Page = resp.NextPage
+	}
+}
+
+// addRepos used by fetchUserRepos and fetchOrgRepos appends new repos from
+// github's org/user response.
+func (owner *Owner) addRepos (githubRepos []*github.Repository) {
+	for _, repo := range githubRepos {
+		owner.repos = append(owner.repos, *newRepo(owner, *repo.Name, *repo.CloneURL))
+	}
+}
+
+// auditRepos
+func (owner *Owner) auditRepos(opts *Options) {
+	for _, repo := range owner.repos {
+		err := repo.audit(owner, opts)
+		if err != nil {
+			owner.failf()
+		}
+	}
+}
+
+// failf
+func (owner *Owner) failf() {
+	// TODO
+}
+
+// exitNow
+func (owner *Owner) exitNow() {
+
+}
+
+// setupDir sets up the owner's directory for clones and reports.
+// If the temporary option is set then a temporary directory will be
+// used for the owner repo clones.
+func (owner *Owner) setupDir(opts *Options) {
+	if opts.Tmp {
+		dir, err := ioutil.TempDir("", owner.name)
+		if err != nil {
+			owner.failf()
+		}
+		owner.path = dir
+	} else {
+		owner.path = filepath.Join(gitLeaksClonePath, owner.name)
+		if _, err := os.Stat(owner.path); os.IsNotExist(err) {
+			os.Mkdir(owner.path, os.ModePerm)
+		}
+	}
+	owner.reportPath = filepath.Join(gitLeaksPath, "report", owner.name)
+}
+
+// rmTmp removes the temporary repo
+func (owner *Owner) rmTmp() {
+	os.RemoveAll(owner.path)
+	os.Exit(EXIT_FAILURE)
+}
+
+// ownerType returns the owner type extracted from opts.
+// If no owner type is provided, gitleaks assumes the owner is ambiguous
+// and the user is running gitleaks on a single repo
+func ownerType(opts *Options) string {
+	if opts.OrgURL != "" {
+		return "org"
+
+	} else if opts.UserURL != "" {
+		return "user"
+	}
+	return ""
+}
+
+// ownerName returns the owner name extracted from the urls provided in opts.
+// If no RepoURL, OrgURL, or UserURL is provided, then owner will log an error
+// and gitleaks will exit.
+func ownerName(opts *Options) (string, error) {
+	if opts.RepoURL != "" {
+		splitSlashes := strings.Split(opts.RepoURL, "/")
+		return splitSlashes[len(splitSlashes)-2], nil
+	} else if opts.UserURL != "" {
+		_, ownerName := path.Split(opts.UserURL)
+		return ownerName, nil
+	} else if opts.OrgURL != "" {
+		_, ownerName := path.Split(opts.OrgURL)
+		return ownerName, nil
+	}
+
+	// TODO error
+	return "", nil
+}
+
+// githubTokenClient creates an oauth client from your github access token.
+// Gitleaks will attempt to retrieve your github access token from a cli argument
+// or an env var - "GITHUB_TOKEN".
+// Might be good to eventually parse the token from a config or creds file in
+// $GITLEAKS_HOME
+func githubTokenClient(opts *Options) *http.Client {
+	var token string
+	if opts.Token != "" {
+		token = opts.Token
+	} else {
+		token = os.Getenv("GITHUB_TOKEN")
+	}
+	if token == "" {
+		return nil
+	}
+
+	tokenService := oauth2.StaticTokenSource(
+		&oauth2.Token{AccessToken: token},
+	)
+	tokenClient := oauth2.NewClient(context.Background(), tokenService)
+	return tokenClient
+}

+ 224 - 0
repo.go

@@ -0,0 +1,224 @@
+package main
+
+import (
+	"path/filepath"
+	"os"
+	"fmt"
+	"os/exec"
+	"sync"
+	"bytes"
+	"encoding/json"
+	"log"
+	"strings"
+	"io/ioutil"
+)
+
+type Repo struct {
+	name  string
+	url   string
+	path  string
+	status string // TODO
+	leaks  []Leak
+	owner *Owner
+}
+
+type Leak struct {
+	Line     string `json:"line"`
+	Commit   string `json:"commit"`
+	Offender string `json:"string"`
+	Reason   string `json:"reason"`
+	Msg      string `json:"commitMsg"`
+	Time     string `json:"time"`
+	Author   string `json:"author"`
+	File     string `json:"file"`
+	RepoURL  string `json:"repoURL"`
+}
+
+type Commit struct {
+	Hash   string
+	Author string
+	Time   string
+	Msg    string
+}
+
+func newRepo(owner *Owner, name string, url string) *Repo {
+	repo := &Repo{
+		name: name,
+		url: url,
+		path: owner.path + "/" + name,
+	}
+	return repo
+}
+
+// Audit operates on a single repo and searches the full or partial history of the repo.
+// A semaphore is declared for every repo to bind concurrency. If unbounded, the system will throw a
+// `too many open files` error. Eventually, gitleaks should use src-d/go-git to avoid shelling out
+// commands so that users could opt for doing all clones/diffs in memory.
+// Audit also declares two WaitGroups, one for distributing regex/entropy checks, and one for receiving
+// the leaks if there are any. This could be done a little more elegantly in the future.
+func (repo *Repo) audit(owner *Owner, opts *Options) error {
+	var (
+		out               []byte
+		err               error
+		commitWG          sync.WaitGroup
+		gitLeakReceiverWG sync.WaitGroup
+		gitLeaksChan      = make(chan Leak)
+		leaks             []Leak
+		semaphoreChan     = make(chan struct{}, opts.Concurrency)
+	)
+
+	dotGitPath := filepath.Join(repo.path, ".git")
+
+	// Navigate to proper location to being audit. Clone repo
+	// if not present, otherwise fetch for new changes.
+	if _, err := os.Stat(dotGitPath); os.IsNotExist(err) {
+		// no repo present, clone it
+		fmt.Printf("Cloning \x1b[37;1m%s\x1b[0m into %s...\n", repo.url, repo.path)
+		err = exec.Command("git", "clone", repo.url, repo.path).Run()
+		if err != nil{
+			fmt.Println("can run clonse")
+		}
+	} else {
+		fmt.Printf("Checking \x1b[37;1m%s\x1b[0m from %s...\n", repo.url, repo.path)
+		err = exec.Command("git", "fetch").Run()
+		if err != nil{
+			fmt.Println("can run fetch")
+		}
+	}
+
+	if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
+		fmt.Println("cant chdir")
+	}
+
+	gitFormat := "--format=%H%n%an%n%s%n%ci"
+	out, err = exec.Command("git", "rev-list", "--all",
+		"--remotes", "--topo-order", gitFormat).Output()
+
+	if err != nil {
+		fmt.Println("problem with rev list")
+	}
+
+	revListLines := bytes.Split(out, []byte("\n"))
+	commits := parseRevList(revListLines)
+
+	for _, commit := range commits {
+		if commit.Hash == "" {
+			continue
+		}
+
+		commitWG.Add(1)
+		go auditDiff(commit, repo, &commitWG, &gitLeakReceiverWG, opts,
+			semaphoreChan, gitLeaksChan)
+
+		if commit.Hash == opts.SinceCommit {
+			break
+		}
+	}
+	go reportAggregator(&gitLeakReceiverWG, gitLeaksChan, &leaks)
+	commitWG.Wait()
+	gitLeakReceiverWG.Wait()
+
+	// repo audit has finished
+	repo.leaks = leaks
+
+	if opts.EnableJSON && len(leaks) != 0 {
+		repo.writeReport(owner)
+	}
+
+	return nil
+}
+
+func (repo *Repo) log() {
+
+}
+
+// Used by audit, writeReport will generate a report and write it out to
+// $GITLEAKS_HOME/report/<owner>/<repo>. No report will be generated if
+// no leaks have been found
+func (repo *Repo) writeReport(owner *Owner) {
+	reportJSON, _ := json.MarshalIndent(repo.leaks, "", "\t")
+	if _, err := os.Stat(owner.reportPath); os.IsNotExist(err) {
+		os.Mkdir(repo.owner.reportPath, os.ModePerm)
+	}
+
+	reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
+	reportFile := filepath.Join(owner.reportPath, reportFileName)
+	err := ioutil.WriteFile(reportFile, reportJSON, 0644)
+	if err != nil {
+		fmt.Println("cant write report")
+	}
+	fmt.Printf("Report written to %s\n", reportFile)
+}
+
+// parseRevList is responsible for parsing the output of
+// $ `git rev-list --all -remotes --topo-order --format=%H%n%an%n%s%n%ci`
+// sample output from the above command looks like:
+//		...
+// 		SHA
+// 		Author Name
+// 		Commit Msg
+// 		Commit Date
+//		...
+// Used by audit
+func parseRevList(revList [][]byte) []Commit {
+	var commits []Commit
+	for i := 0; i < len(revList)-1; i = i + 5 {
+		commit := Commit{
+			Hash:   string(revList[i+1]),
+			Author: string(revList[i+2]),
+			Msg:    string(revList[i+3]),
+			Time:   string(revList[i+4]),
+		}
+		commits = append(commits, commit)
+	}
+	return commits
+}
+
+// reportAggregator is a go func responsible for ...
+func reportAggregator(gitLeakReceiverWG *sync.WaitGroup, gitLeaks chan Leak, leaks *[]Leak) {
+	for gitLeak := range gitLeaks {
+		b, err := json.MarshalIndent(gitLeak, "", "   ")
+		if err != nil {
+			fmt.Println("failed to output leak:", err)
+		}
+		fmt.Println(string(b))
+		*leaks = append(*leaks, gitLeak)
+		gitLeakReceiverWG.Done()
+	}
+}
+
+// Used by audit, auditDiff is a go func responsible for diffing and auditing a commit.
+// Three channels are input here: 1. a semaphore to bind gitleaks, 2. a leak stream, 3. error handling (TODO)
+// This func performs a diff and runs regexes checks on each line of the diff.
+func auditDiff(currCommit Commit, repo *Repo, commitWG *sync.WaitGroup,
+	gitLeakReceiverWG *sync.WaitGroup, opts *Options, semaphoreChan chan struct{},
+	gitLeaks chan Leak) {
+	// signal to WG this diff is done being audited
+	defer commitWG.Done()
+
+	if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
+		log.Fatal(err)
+	}
+
+	commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
+	semaphoreChan <- struct{}{}
+	out, err := exec.Command("git", "diff", commitCmp).Output()
+	<-semaphoreChan
+
+	if err != nil {
+		// TODO
+		if strings.Contains(err.Error(), "too many files open") {
+			log.Printf("error retrieving diff for commit %s. Try turning concurrency down. %v\n", currCommit, err)
+		}
+	}
+
+	leaks := doChecks(string(out), currCommit, opts, repo)
+	if len(leaks) == 0 {
+		return
+	}
+	for _, leak := range leaks {
+		gitLeakReceiverWG.Add(1)
+		gitLeaks <- leak
+	}
+}
+