Просмотр исходного кода

Merge pull request #55 from zricethezav/feature/usability

Feature/usability
Zachary Rice 8 лет назад
Родитель
Сommit
9f3ae4efa0
11 измененных файлов с 1006 добавлено и 550 удалено
  1. 57 66
      README.md
  2. 5 8
      checks.go
  3. 4 4
      checks_test.go
  4. 0 220
      leaks.go
  5. 30 190
      main.go
  6. 189 62
      options.go
  7. 88 0
      options_test.go
  8. 275 0
      owner.go
  9. 35 0
      owner_test.go
  10. 258 0
      repo.go
  11. 65 0
      repo_test.go

+ 57 - 66
README.md

@@ -1,5 +1,5 @@
 ![Alt Text](https://github.com/zricethezav/gifs/blob/master/gitleaks1.png) [![Build Status](https://travis-ci.org/zricethezav/gitleaks.svg?branch=master)](https://travis-ci.org/zricethezav/gitleaks)
-## Check git repos for secrets and keys
+## Audit git repos for secrets and keys
 
 #### Installing
 
@@ -12,84 +12,75 @@ go get -u github.com/zricethezav/gitleaks
 ![Alt Text](https://github.com/zricethezav/gifs/blob/master/gitleaks.gif)
 
 ```bash
-./gitleaks {git url}
+./gitleaks [options] <url/path>
 ```
 
-Gitleaks will clone the target `<git url>` to `$HOME/.gitleaks/clones/<repo name>` and run a regex check against all diffs of all commits on all remotes in topological order. If any leaks are found gitleaks will output the leak in json, Ex:
+Gitleaks audits local and remote repos by running regex checks against all commits.
+
+#### Options
+```
+usage: gitleaks [options] <URL>/<path_to_repo>
+
+Options:
+ -u --user              Git user mode
+ -r --repo              Git repo mode
+ -o --org               Git organization mode
+ -l --local             Local mode, gitleaks will look for local repo in <path>
+ -t --temp              Clone to temporary directory
+ -v --verbose           Verbose mode, will output leaks as gitleaks finds them
+ --report-path=<STR>    Save report to path, gitleaks default behavior is to save report to pwd
+ --clone-path=<STR>     Gitleaks will clone repos here, default pwd
+ --concurrency=<INT>    Upper bound on concurrent diffs
+ --since=<STR>          Commit to stop at
+ --b64Entropy=<INT>     Base64 entropy cutoff (default is 70)
+ --hexEntropy=<INT>     Hex entropy cutoff (default is 40)
+ -e --entropy           Enable entropy
+ -h --help              Display this message
+ --token=<STR>          Github API token
+ --stopwords            Enables stopwords
 ```
-{
-   "line": "-const AWS_KEY = \"AKIALALEMEL33243OLIAE\"",
-   "commit": "eaeffdc65b4c73ccb67e75d96bd8743be2c85973",
-   "string": "AKIALALEMEL33243OLIA",
-   "reason": "AWS",
-   "commitMsg": "remove fake key",
-   "time": "2018-02-04 19:43:28 -0600",
-   "author": "Zachary Rice",
-   "file": "main.go",
-   "repoURL": "https://github.com/zricethezav/gronit"
-}
-``` 
-Gitleaks will not re-clone repos unless the temporary flag is set (see Options section), instead gitleaks will `fetch` all new changes before the scan. This works for users and organization repos as well. Regex's for the scan are defined in `main.go`. Feel free to open a PR and contribute if you have additional regex you want included. Work largely based on  [https://people.eecs.berkeley.edu/~rohanpadhye/files/key_leaks-msr15.pdf](https://people.eecs.berkeley.edu/~rohanpadhye/files/key_leaks-msr15.pdf) and regexes from https://github.com/dxa4481/truffleHog and https://github.com/anshumanbh/git-all-secrets.
-
-#### Example with Report
+
+#### Exit Codes 
+code | explanation
+ -------------|-------------
+0 | Gitleaks succeeded with no leaks
+1 | Gitleaks failed or wasn't attempted due to execution failure
+2 | Gitleaks succeeded and leaks were present during the audit
+
+Use these codes to hook gitleaks into whatever pipeline you're running
+
+#### Examples
 ```bash
-gitleaks --json https://github.com/zricethezav/gronit
+gitleaks
 ```
-This will run gitleaks on one of my projects, gronit and create the following structure in `$HOME/.gitleaks`:
+Run audit on current working directory if `.git` is present 
+
+```bash
+gitleaks --local $HOME/audits/some/repo
 ```
-.
-├── clones
-│   └── zricethezav
-│       └── gronit
-│           ├── README.md
-│           ├── main.go
-│           ├── options.go
-│           ├── server.go
-│           └── utils.go
-└── report
-    └── zricethezav
-        └── gronit_leaks.json
+Run audit on repo located in `HOME/audits/some/repo` if `.git` is present 
+
+```bash
+gitleaks https://github.com/some/repo
 ```
-The clones directory contains the repo owner (me) and any repos gitleaks has scanned. Next time we run gitleaks on gronit again we will `fetch` gronit rather than `clone`. Reports are written out to `$HOME/.gitleaks/report/<owner>/<repo>_leaks.json`
+Run audit on `github.com/some/repo.git` and clone repo to 
 
-#### Options
+```bash
+gitleaks --clone-path=$HOME/Desktop/audits https://github.com/some/repo
 ```
-usage: gitleaks [options] <url>
+Run audit on `github.com/some/repo.git` and clone repo to $HOME/Desktop/audits 
 
-Options:
- -c --concurrency 	Upper bound on concurrent diffs
- -u --user 		    Git user url
- -r --repo 		    Git repo url
- -o --org 		    Git organization url
- -s --since 		Commit to stop at
- -b --b64Entropy 	Base64 entropy cutoff (default is 70)
- -x --hexEntropy  	Hex entropy cutoff (default is 40)
- -e --entropy		Enable entropy		
- -j --json 		    Output gitleaks report
- --token    		Github API token
- --strict 		    Enables stopwords
- -h --help 		    Display this message
+```bash
+gitleaks --temp https://github.com/some/repo
+```
+Run audit on `github.com/some/repo.git` and clone repo to $TMPDIR (this will remove repos after audit is complete)
 
+```bash
+gitleaks --temp -u https://github.com/some-user
 ```
+Run audit on all of `some-user`'s repos. Again, `--temp` flag will clone all repos into $TMPDIR after be removed after audit 
+
 
-##### Options Explained
-
-| Option | Explanation |
-| ------------- | ------------- |
-| -c --concurrency | Set the limit on the number of concurrent diffs. If unbounded, your system would throw a `too many open files` error. Tweak `ulimit` for quicker scans at your own risk. Ex: `gitleaks -c 100 <repo_url>` |
-| -u --user | Target git user. Reports and clones are dumped to `$HOME/.gitleaks/clones/<user>/<user_repos>` and `$HOME/.gitleaks/reports/<user>/<gitleaks_reports>`. Ex: `gitleaks -u <user_git_url>`.
-| -o --org | Target git organization. Reports and clones are dumped to `$HOME/.gitleaks/clones/<org>/<org_repos>` and `$HOME/.gitleaks/reports/<org>/<gitleaks_reports>`. Ex: `gitleaks -o <org_git_url>`
-| -r --repo | Default behavior is to have gitleaks target a specific repo, so this option is unecessary, but... Target git repo. Reports and clones are dumped to `$HOME/.gitleaks/clones/<owner>/<repos>` and `$HOME/.gitleaks/reports/<owner>/<gitleaks_reports>`
-| -s --since  | Since argument accepts a commit hash and will scan the repo history up to and including this hash. Ex: `gitleaks -s <HASH> <repo_url>`
-| -b --b64Entropy | Entropy cutoff for base 64 characters. Ex: `gitleaks -e -b 70 <repo_url>` |
-| -x --hexEntropy | Entropy cutoff for hex characters. Ex: `gitleaks -e -x 70 <repo_url>` |
-| -e --entroy | Enable entropy checks. Ex: `gitleaks -e <repo_url>` |
-| -j --json | Enable report generation. Ex: `gitleaks --json <repo_url>` | 
-| -t --temporary | Cloned repos will be cloned into a temp directory and removed after gitleaks exits. Ex: `gitleaks -t <repo_url>` |
-| --token | NOTE: you should use env var `GITHUB_TOKEN` instead of this flag. Github API token needed for scanning private repos and pagination on repo fetching from github's api. |
-| -- strict | Enable stopwords. Ex: `gitleaks --strict <repo_url>` |
-
-NOTE: your mileage may vary so if you aren't getting the results you expected try updating the regexes to fit your needs or try tweaking the entropy cutoffs and stopwords. Entropy cutoff for base64 alphabets seemed to give good results around 70 and hex alphabets seemed to give good results around 40. Entropy is calculated using [Shannon entropy](http://www.bearcave.com/misl/misl_tech/wavelets/compression/shannon.html).
 
 
 ### If you find a valid leak in a repo

+ 5 - 8
checks.go

@@ -1,19 +1,16 @@
 package main
 
 import (
-	_ "fmt"
 	"math"
 	"strings"
 )
 
-// TODO LOCAL REPO!!!!
-
 // checks Regex and if enabled, entropy and stopwords
-func doChecks(diff string, commit Commit, opts *Options, repo RepoDesc) []LeakElem {
+func doChecks(diff string, commit Commit, repo *Repo) []Leak {
 	var (
 		match string
-		leaks []LeakElem
-		leak  LeakElem
+		leaks []Leak
+		leak  Leak
 	)
 
 	lines := strings.Split(diff, "\n")
@@ -34,7 +31,7 @@ func doChecks(diff string, commit Commit, opts *Options, repo RepoDesc) []LeakEl
 				continue
 			}
 
-			leak = LeakElem{
+			leak = Leak{
 				Line:     line,
 				Commit:   commit.Hash,
 				Offender: match,
@@ -109,7 +106,7 @@ func checkShannonEntropy(target string, opts *Options) bool {
 func containsStopWords(target string) bool {
 	// Convert to lowercase to reduce the number of loops needed.
 	target = strings.ToLower(target)
-	
+
 	for _, stopWord := range stopWords {
 		if strings.Contains(target, stopWord) {
 			return true

+ 4 - 4
checks_test.go

@@ -5,14 +5,14 @@ import (
 )
 
 func TestCheckRegex(t *testing.T) {
-	var results []LeakElem
-	opts := &Options{
+	var results []Leak
+	opts = &Options{
 		Concurrency:      10,
 		B64EntropyCutoff: 70,
 		HexEntropyCutoff: 40,
 		Entropy:          false,
 	}
-	repo := RepoDesc{
+	repo := Repo{
 		url: "someurl",
 	}
 	commit := Commit{}
@@ -22,7 +22,7 @@ func TestCheckRegex(t *testing.T) {
 	}
 
 	for k, v := range checks {
-		results = doChecks(k, commit, opts, repo)
+		results = doChecks(k, commit, &repo)
 		if v != len(results) {
 			t.Errorf("regexCheck failed on string %s", k)
 		}

+ 0 - 220
leaks.go

@@ -1,220 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"log"
-	"os"
-	"os/exec"
-	"os/signal"
-	"path/filepath"
-	"strings"
-	"sync"
-	"syscall"
-)
-
-// LeakElem contains the line and commit of a leak
-type LeakElem struct {
-	Line     string `json:"line"`
-	Commit   string `json:"commit"`
-	Offender string `json:"string"`
-	Reason   string `json:"reason"`
-	Msg      string `json:"commitMsg"`
-	Time     string `json:"time"`
-	Author   string `json:"author"`
-	File     string `json:"file"`
-	RepoURL  string `json:"repoURL"`
-}
-
-type Commit struct {
-	Hash   string
-	Author string
-	Time   string
-	Msg    string
-}
-
-func rmTmp(owner *Owner) {
-	if _, err := os.Stat(owner.path); err == nil {
-		err := os.RemoveAll(owner.path)
-		log.Printf("\nCleaning up tmp repos in %s\n", owner.path)
-		if err != nil {
-			log.Printf("failed to properly remove tmp gitleaks dir: %v", err)
-		}
-	}
-	os.Exit(1)
-}
-
-// start
-func start(repos []RepoDesc, owner *Owner, opts *Options) {
-	var report []LeakElem
-	if opts.Tmp {
-		defer rmTmp(owner)
-	}
-
-	// interrupt handling
-	c := make(chan os.Signal, 2)
-	signal.Notify(c, os.Interrupt, syscall.SIGTERM)
-	go func() {
-		<-c
-		if opts.Tmp {
-			rmTmp(owner)
-		}
-		os.Exit(1)
-	}()
-
-	// run checks on repos
-	for _, repo := range repos {
-		dotGitPath := filepath.Join(repo.path, ".git")
-		if _, err := os.Stat(dotGitPath); err == nil {
-			if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
-				log.Fatal(err)
-			}
-			// use pre-cloned repo
-			fmt.Printf("Checking \x1b[37;1m%s\x1b[0m...\n", repo.url)
-			err := exec.Command("git", "fetch").Run()
-			if err != nil {
-				log.Printf("failed to fetch repo %v", err)
-				return
-			}
-			report = getLeaks(repo, owner, opts)
-		} else {
-			// no repo present, clone it
-			if err := os.Chdir(fmt.Sprintf(owner.path)); err != nil {
-				log.Fatal(err)
-			}
-			fmt.Printf("Cloning \x1b[37;1m%s\x1b[0m...\n", repo.url)
-			err := exec.Command("git", "clone", repo.url).Run()
-			if err != nil {
-				fmt.Printf("failed to clone repo %v", err)
-				return
-			}
-			report = getLeaks(repo, owner, opts)
-		}
-
-		if len(report) == 0 {
-			fmt.Printf("No Leaks detected for \x1b[35;2m%s\x1b[0m...\n", repo.url)
-		}
-
-		if opts.EnableJSON && len(report) != 0 {
-			outputGitLeaksReport(report, repo, opts)
-		}
-	}
-}
-
-// outputGitLeaksReport
-func outputGitLeaksReport(report []LeakElem, repo RepoDesc, opts *Options) {
-	reportJSON, _ := json.MarshalIndent(report, "", "\t")
-	if _, err := os.Stat(repo.owner.reportPath); os.IsNotExist(err) {
-		os.Mkdir(repo.owner.reportPath, os.ModePerm)
-	}
-
-	reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
-	reportFile := filepath.Join(repo.owner.reportPath, reportFileName)
-	err := ioutil.WriteFile(reportFile, reportJSON, 0644)
-	if err != nil {
-		log.Fatalf("Can't write to file: %s", err)
-	}
-	fmt.Printf("Report written to %s\n", reportFile)
-}
-
-// getLeaks will attempt to find gitleaks
-func getLeaks(repo RepoDesc, owner *Owner, opts *Options) []LeakElem {
-	var (
-		out               []byte
-		err               error
-		commitWG          sync.WaitGroup
-		gitLeakReceiverWG sync.WaitGroup
-		gitLeaks          = make(chan LeakElem)
-		report            []LeakElem
-	)
-	semaphoreChan := make(chan struct{}, opts.Concurrency)
-
-	go func(commitWG *sync.WaitGroup, gitLeakReceiverWG *sync.WaitGroup) {
-		for gitLeak := range gitLeaks {
-			b, err := json.MarshalIndent(gitLeak, "", "   ")
-			if err != nil {
-				fmt.Println("failed to output leak:", err)
-			}
-			fmt.Println(string(b))
-			report = append(report, gitLeak)
-			gitLeakReceiverWG.Done()
-		}
-	}(&commitWG, &gitLeakReceiverWG)
-
-	if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
-		log.Fatal(err)
-	}
-
-	gitFormat := "--format=%H%n%an%n%s%n%ci"
-	out, err = exec.Command("git", "rev-list", "--all",
-		"--remotes", "--topo-order", gitFormat).Output()
-	if err != nil {
-		log.Fatalf("error retrieving commits%v\n", err)
-	}
-
-	revListLines := bytes.Split(out, []byte("\n"))
-	commits := parseFormattedRevList(revListLines)
-
-	for _, commit := range commits {
-		if commit.Hash == "" {
-			continue
-		}
-
-		commitWG.Add(1)
-		go func(currCommit Commit, repoName string, commitWG *sync.WaitGroup,
-			gitLeakReceiverWG *sync.WaitGroup, opts *Options) {
-			defer commitWG.Done()
-			if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
-				log.Fatal(err)
-			}
-
-			commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
-			semaphoreChan <- struct{}{}
-			out, err := exec.Command("git", "diff", commitCmp).Output()
-			<-semaphoreChan
-
-			if err != nil {
-				if strings.Contains(err.Error(), "too many files open") {
-					log.Printf("error retrieving diff for commit %s. Try turning concurrency down. %v\n", currCommit, err)
-				}
-				if opts.Tmp {
-					rmTmp(owner)
-				}
-			}
-
-			leaks := doChecks(string(out), currCommit, opts, repo)
-			if len(leaks) == 0 {
-				return
-			}
-			for _, leak := range leaks {
-				gitLeakReceiverWG.Add(1)
-				gitLeaks <- leak
-			}
-
-		}(commit, repo.name, &commitWG, &gitLeakReceiverWG, opts)
-
-		if commit.Hash == opts.SinceCommit {
-			break
-		}
-	}
-
-	commitWG.Wait()
-	gitLeakReceiverWG.Wait()
-	return report
-}
-
-func parseFormattedRevList(revList [][]byte) []Commit {
-	var commits []Commit
-	for i := 0; i < len(revList)-1; i = i + 5 {
-		commit := Commit{
-			Hash:   string(revList[i+1]),
-			Author: string(revList[i+2]),
-			Msg:    string(revList[i+3]),
-			Time:   string(revList[i+4]),
-		}
-		commits = append(commits, commit)
-	}
-	return commits
-}

+ 30 - 190
main.go

@@ -1,53 +1,40 @@
 package main
 
 import (
-	"context"
-	"github.com/google/go-github/github"
-	"github.com/mitchellh/go-homedir"
-	"golang.org/x/oauth2"
-	"io/ioutil"
-	"log"
-	"net/http"
+	"fmt"
 	"os"
-	"path"
-	"path/filepath"
 	"regexp"
-	"strings"
 )
 
-var (
-	regexes            map[string]*regexp.Regexp
-	stopWords          []string
-	base64Chars        string
-	hexChars           string
-	assignRegex        *regexp.Regexp
-	fileDiffRegex      *regexp.Regexp
-	gitLeaksPath       string
-	gitLeaksClonePath  string
-	gitLeaksReportPath string
-)
+// ExitClean : no leaks have been found
+const ExitClean = 0
 
-type RepoDesc struct {
-	name  string
-	url   string
-	path  string
-	owner *Owner
-}
+// ExitFailure : gitleaks has encountered an error or SIGINT
+const ExitFailure = 1
 
-type Owner struct {
-	name        string
-	url         string
-	accountType string
-	path        string
-	reportPath  string
-}
+// ExitLeaks : leaks are present in scanned repos
+const ExitLeaks = 2
+
+// package globals
+var (
+	regexes       map[string]*regexp.Regexp
+	stopWords     []string
+	base64Chars   string
+	hexChars      string
+	assignRegex   *regexp.Regexp
+	fileDiffRegex *regexp.Regexp
+	opts          *Options
+	pwd           string
+)
 
 func init() {
 	base64Chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
 	hexChars = "1234567890abcdefABCDEF"
-
 	stopWords = []string{"setting", "info", "env", "environment"}
+	fileDiffRegex = regexp.MustCompile("diff --git a.+b/")
+	assignRegex = regexp.MustCompile(`(=|:|:=|<-)`)
 
+	// TODO Externalize regex... this is tricky making it yml compliant
 	regexes = map[string]*regexp.Regexp{
 		"PKCS8":    regexp.MustCompile("-----BEGIN PRIVATE KEY-----"),
 		"RSA":      regexp.MustCompile("-----BEGIN RSA PRIVATE KEY-----"),
@@ -59,164 +46,17 @@ func init() {
 		"AWS":      regexp.MustCompile("AKIA[0-9A-Z]{16}"),
 		"Reddit":   regexp.MustCompile("(?i)reddit.*['\"][0-9a-zA-Z]{14}['\"]"),
 		"Heroku":   regexp.MustCompile("(?i)heroku.*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}"),
-		// "Custom": regexp.MustCompile(".*")
-	}
-	assignRegex = regexp.MustCompile(`(=|:|:=|<-)`)
-	fileDiffRegex = regexp.MustCompile("diff --git a.+b/")
-	homeDir, err := homedir.Dir()
-	if err != nil {
-		log.Fatal("Cant find home dir")
-	}
-
-	gitLeaksPath = filepath.Join(homeDir, ".gitleaks")
-	if _, err := os.Stat(gitLeaksPath); os.IsNotExist(err) {
-		os.Mkdir(gitLeaksPath, os.ModePerm)
-	}
-	gitLeaksClonePath = filepath.Join(gitLeaksPath, "clones")
-	if _, err := os.Stat(gitLeaksClonePath); os.IsNotExist(err) {
-		os.Mkdir(gitLeaksClonePath, os.ModePerm)
-	}
-	gitLeaksReportPath = filepath.Join(gitLeaksPath, "report")
-	if _, err := os.Stat(gitLeaksReportPath); os.IsNotExist(err) {
-		os.Mkdir(gitLeaksReportPath, os.ModePerm)
-	}
-}
-
-// getOwner
-func getOwner(opts *Options) *Owner {
-	var owner Owner
-	if opts.RepoURL != "" {
-		splitSlashes := strings.Split(opts.RepoURL, "/")
-		owner = Owner{
-			name:        splitSlashes[len(splitSlashes)-2],
-			url:         opts.RepoURL,
-			accountType: "users",
-		}
-
-	} else if opts.UserURL != "" {
-		_, ownerName := path.Split(opts.UserURL)
-		owner = Owner{
-			name:        ownerName,
-			url:         opts.UserURL,
-			accountType: "user",
-		}
-	} else if opts.OrgURL != "" {
-		_, ownerName := path.Split(opts.OrgURL)
-		owner = Owner{
-			name:        ownerName,
-			url:         opts.OrgURL,
-			accountType: "org",
-		}
 	}
-
-	if opts.Tmp {
-		dir, err := ioutil.TempDir("", owner.name)
-		if err != nil {
-			log.Fatal("Cant make temp dir")
-		}
-		owner.path = dir
-	} else {
-		owner.path = filepath.Join(gitLeaksClonePath, owner.name)
-		if _, err := os.Stat(owner.path); os.IsNotExist(err) {
-			os.Mkdir(owner.path, os.ModePerm)
-		}
-	}
-	owner.reportPath = filepath.Join(gitLeaksPath, "report", owner.name)
-	return &owner
-}
-
-// getRepos
-func getRepos(opts *Options, owner *Owner) []RepoDesc {
-	var (
-		allRepos  []*github.Repository
-		repos     []*github.Repository
-		repoDescs []RepoDesc
-		resp      *github.Response
-		ctx       = context.Background()
-		err       error
-	)
-	if opts.RepoURL != "" {
-		_, repoName := path.Split(opts.RepoURL)
-		if strings.HasSuffix(repoName, ".git") {
-			repoName = repoName[:len(repoName)-4]
-		}
-		ownerPath := filepath.Join(owner.path, repoName)
-		repo := RepoDesc{
-			name:  repoName,
-			url:   opts.RepoURL,
-			owner: owner,
-			path:  ownerPath}
-		repoDescs = append(repoDescs, repo)
-		return repoDescs
-	}
-
-	tokenClient := getAccessToken(opts)
-	gitClient := github.NewClient(tokenClient)
-
-	// TODO include fork check
-	orgOpt := &github.RepositoryListByOrgOptions{
-		ListOptions: github.ListOptions{PerPage: 10},
-	}
-	userOpt := &github.RepositoryListOptions{
-		ListOptions: github.ListOptions{PerPage: 10},
-	}
-
-	for {
-		if opts.UserURL != "" {
-			repos, resp, err = gitClient.Repositories.List(
-				ctx, owner.name, userOpt)
-		} else if opts.OrgURL != "" {
-			repos, resp, err = gitClient.Repositories.ListByOrg(
-				ctx, owner.name, orgOpt)
-		}
-		allRepos = append(allRepos, repos...)
-		if resp.NextPage == 0 || err != nil {
-			break
-		}
-
-		for _, repo := range repos {
-			repoPath := filepath.Join(owner.path, *repo.Name)
-			repoDescs = append(repoDescs,
-				RepoDesc{
-					name:  *repo.Name,
-					url:   *repo.CloneURL,
-					owner: owner,
-					path:  repoPath})
-		}
-
-		orgOpt.Page = resp.NextPage
-		userOpt.Page = resp.NextPage
-	}
-
-	return repoDescs
-}
-
-// getAccessToken checks
-// 1. option
-// 2. env var
-// TODO. $HOME/.gitleaks/.creds
-func getAccessToken(opts *Options) *http.Client {
-	var token string
-	if opts.Token != "" {
-		token = opts.Token
-	} else {
-		token = os.Getenv("GITHUB_TOKEN")
-	}
-	if token == "" {
-		return nil
-	}
-
-	tokenService := oauth2.StaticTokenSource(
-		&oauth2.Token{AccessToken: token},
-	)
-	tokenClient := oauth2.NewClient(context.Background(), tokenService)
-	return tokenClient
 }
 
 func main() {
 	args := os.Args[1:]
-	opts := parseOptions(args)
-	owner := getOwner(opts)
-	repos := getRepos(opts, owner)
-	start(repos, owner, opts)
+	opts = newOpts(args)
+	owner := newOwner()
+	os.Exit(owner.auditRepos())
+}
+
+func failF(format string, args ...interface{}) {
+	fmt.Fprintf(os.Stderr, format, args...)
+	os.Exit(ExitFailure)
 }

+ 189 - 62
options.go

@@ -3,53 +3,64 @@ package main
 import (
 	"fmt"
 	"os"
+	"path/filepath"
+	"regexp"
 	"strconv"
+	"strings"
 )
 
-const usage = `usage: gitleaks [options] <url>
+const usage = `
+usage: gitleaks [options] <URL>/<path_to_repo>
 
 Options:
- -c --concurrency 	Upper bound on concurrent diffs
- -u --user 		Git user url
- -r --repo 		Git repo url
- -o --org 		Git organization url
- -s --since 		Commit to stop at
- -b --b64Entropy 	Base64 entropy cutoff (default is 70)
- -x --hexEntropy  	Hex entropy cutoff (default is 40)
+ -u --user 		Git user mode
+ -r --repo 		Git repo mode
+ -o --org 		Git organization mode
+ -l --local 		Local mode, gitleaks will look for local repo in <path>
+ -v --verbose 		Verbose mode, will output leaks as gitleaks finds them
+ --report-path=<STR> 	Report output, default $GITLEAKS_HOME/report
+ --clone-path=<STR>	Gitleaks will clone repos here, default $GITLEAKS_HOME/clones
+ -t --temp 		Clone to temporary directory
+ --concurrency=<INT> 	Upper bound on concurrent diffs
+ --since=<STR> 		Commit to stop at
+ --b64Entropy=<INT> 	Base64 entropy cutoff (default is 70)
+ --hexEntropy=<INT>  	Hex entropy cutoff (default is 40)
  -e --entropy		Enable entropy		
- -j --json 		Output gitleaks report
  -h --help 		Display this message
- --token    		Github API token
- --strict 		Enables stopwords
+ --token=<STR>    	Github API token
+ --stopwords  		Enables stopwords
+
 `
 
-// Options for gitleaks
+// Options for gitleaks. need to support remote repo/owner
+// and local repo/owner mode
 type Options struct {
+	URL      string
+	RepoPath string
+	ReportPath string
+	ClonePath  string
 	Concurrency      int
 	B64EntropyCutoff int
 	HexEntropyCutoff int
-	UserURL          string
-	OrgURL           string
-	RepoURL          string
-	Strict           bool
-	Entropy          bool
-	SinceCommit      string
-	Persist          bool
-	IncludeForks     bool
-	Tmp              bool
-	EnableJSON       bool
-	Token            string
-	Verbose          bool
+	UserMode  bool
+	OrgMode   bool
+	RepoMode  bool
+	LocalMode bool
+	Strict       bool
+	Entropy      bool
+	SinceCommit  string
+	Tmp          bool
+	Token        string
+	Verbose  bool
 }
 
 // help prints the usage string and exits
 func help() {
 	os.Stderr.WriteString(usage)
-	os.Exit(1)
 }
 
 // optionsNextInt is a parseOptions helper that returns the value (int) of an option if valid
-func optionsNextInt(args []string, i *int) int {
+func (opts *Options) nextInt(args []string, i *int) int {
 	if len(args) > *i+1 {
 		*i++
 	} else {
@@ -57,80 +68,196 @@ func optionsNextInt(args []string, i *int) int {
 	}
 	argInt, err := strconv.Atoi(args[*i])
 	if err != nil {
-		fmt.Printf("Invalid %s option: %s\n", args[*i-1], args[*i])
-		help()
+		opts.failF("Invalid %s option: %s\n", args[*i-1], args[*i])
 	}
 	return argInt
 }
 
 // optionsNextString is a parseOptions helper that returns the value (string) of an option if valid
-func optionsNextString(args []string, i *int) string {
+func (opts *Options) nextString(args []string, i *int) string {
 	if len(args) > *i+1 {
 		*i++
 	} else {
-		fmt.Printf("Invalid %s option: %s\n", args[*i-1], args[*i])
-		help()
+		opts.failF("Invalid %s option: %s\n", args[*i-1], args[*i])
 	}
 	return args[*i]
 }
 
-// parseOptions
-func parseOptions(args []string) *Options {
-	opts := &Options{
+// optInt grabs the string ...
+func (opts *Options) optString(arg string, prefixes ...string) (bool, string) {
+	for _, prefix := range prefixes {
+		if strings.HasPrefix(arg, prefix) {
+			return true, arg[len(prefix):]
+		}
+	}
+	return false, ""
+}
+
+// optInt grabs the int ...
+func (opts *Options) optInt(arg string, prefixes ...string) (bool, int) {
+	for _, prefix := range prefixes {
+		if strings.HasPrefix(arg, prefix) {
+			i, err := strconv.Atoi(arg[len(prefix):])
+			if err != nil {
+				opts.failF("Invalid %s int option\n", prefix)
+			}
+			return true, i
+		}
+	}
+	return false, 0
+}
+
+// newOpts generates opts and parses arguments
+func newOpts(args []string) *Options {
+	opts, err := defaultOptions()
+	if err != nil {
+		opts.failF("%v", err)
+	}
+	err = opts.parseOptions(args)
+	if err != nil {
+		opts.failF("%v", err)
+	}
+	return opts
+}
+
+// deafultOptions provides the default options
+func defaultOptions() (*Options, error) {
+	return &Options{
 		Concurrency:      10,
 		B64EntropyCutoff: 70,
 		HexEntropyCutoff: 40,
-	}
+	}, nil
+}
+
+// parseOptions
+func (opts *Options) parseOptions(args []string) error {
 
 	if len(args) == 0 {
-		help()
+		opts.LocalMode = true
+		opts.RepoPath, _ = os.Getwd()
 	}
 
 	for i := 0; i < len(args); i++ {
 		arg := args[i]
 		switch arg {
-		case "-s", "--since":
-			opts.SinceCommit = optionsNextString(args, &i)
-		case "--strict":
+		case "--stopwords":
 			opts.Strict = true
-		case "-b", "--b64Entropy":
-			opts.B64EntropyCutoff = optionsNextInt(args, &i)
-		case "-x", "--hexEntropy":
-			opts.HexEntropyCutoff = optionsNextInt(args, &i)
 		case "-e", "--entropy":
 			opts.Entropy = true
-		case "-c", "--concurrency":
-			opts.Concurrency = optionsNextInt(args, &i)
 		case "-o", "--org":
-			opts.OrgURL = optionsNextString(args, &i)
+			opts.OrgMode = true
 		case "-u", "--user":
-			opts.UserURL = optionsNextString(args, &i)
+			opts.UserMode = true
 		case "-r", "--repo":
-			opts.RepoURL = optionsNextString(args, &i)
-		case "-t", "--temporary":
+			opts.RepoMode = true
+		case "-l", "--local":
+			opts.LocalMode = true
+		case "-v", "--verbose":
+			opts.Verbose = true
+		case "-t", "--temp":
 			opts.Tmp = true
-		case "--token":
-			opts.Token = optionsNextString(args, &i)
-		case "-j", "--json":
-			opts.EnableJSON = true
 		case "-h", "--help":
 			help()
-			return nil
+			os.Exit(ExitClean)
 		default:
-			if i == len(args)-1 && opts.OrgURL == "" && opts.RepoURL == "" &&
-				opts.UserURL == "" {
-				opts.RepoURL = arg
+			if match, value := opts.optString(arg, "--token="); match {
+				opts.Token = value
+			} else if match, value := opts.optString(arg, "--since="); match {
+				opts.SinceCommit = value
+			} else if match, value := opts.optString(arg, "--report-path="); match {
+				opts.ReportPath = value
+			} else if match, value := opts.optString(arg, "--clone-path="); match {
+				opts.ClonePath = value
+			} else if match, value := opts.optInt(arg, "--b64Entropy="); match {
+				opts.B64EntropyCutoff = value
+			} else if match, value := opts.optInt(arg, "--hexEntropy="); match {
+				opts.HexEntropyCutoff = value
+			} else if match, value := opts.optInt(arg, "--concurrency="); match {
+				opts.Concurrency = value
+			} else if i == len(args)-1 {
+				fmt.Println(args[i])
+				if opts.LocalMode {
+					opts.RepoPath = filepath.Clean(args[i])
+				} else {
+					if isGithubTarget(args[i]) {
+						opts.URL = args[i]
+					} else {
+						help()
+						return fmt.Errorf("Unknown option %s\n", arg)
+					}
+				}
 			} else {
-				fmt.Printf("Unknown option %s\n\n", arg)
 				help()
+				return fmt.Errorf("Unknown option %s\n", arg)
 			}
 		}
 	}
 
-	// "guards"
-	if opts.Tmp && opts.EnableJSON {
-		fmt.Println("Report generation with temporary clones not supported")
+	// TODO cleanup this logic
+	if !opts.RepoMode && !opts.UserMode && !opts.OrgMode && !opts.LocalMode {
+		if opts.URL != "" {
+			opts.RepoMode = true
+			err := opts.guards()
+			if err != nil{
+				return err
+			}
+			return nil
+		}
+
+		pwd, _ = os.Getwd()
+		// check if pwd contains a .git, if it does, run local mode
+		dotGitPath := filepath.Join(pwd, ".git")
+
+		if _, err := os.Stat(dotGitPath); os.IsNotExist(err) {
+			return fmt.Errorf("gitleaks has no target: %v", err)
+		} else {
+			opts.LocalMode = true
+			opts.RepoPath = pwd
+			opts.RepoMode = false
+		}
 	}
 
-	return opts
+	err := opts.guards()
+	if err != nil{
+		return err
+	}
+	return err
+}
+
+// failF prints a failure message out to stderr, displays help
+// and exits with a exit code 2
+func (opts *Options) failF(format string, args ...interface{}) {
+	fmt.Fprintf(os.Stderr, format, args...)
+	help()
+	os.Exit(ExitFailure)
+}
+
+// guards will prevent gitleaks from continuing if any invalid options
+// are found.
+func (opts *Options) guards() error {
+	if (opts.RepoMode || opts.OrgMode || opts.UserMode) && opts.LocalMode {
+		return fmt.Errorf("Cannot run Gitleaks on repo/user/org mode and local mode\n")
+	} else if (opts.RepoMode || opts.OrgMode || opts.UserMode) && !isGithubTarget(opts.URL) {
+		return fmt.Errorf("Not valid github target %s\n", opts.URL)
+	} else if (opts.RepoMode || opts.UserMode) && opts.OrgMode {
+		return fmt.Errorf("Cannot run Gitleaks on more than one mode\n")
+	} else if (opts.OrgMode || opts.UserMode) && opts.RepoMode {
+		return fmt.Errorf("Cannot run Gitleaks on more than one mode\n")
+	} else if (opts.OrgMode || opts.RepoMode) && opts.UserMode {
+		return fmt.Errorf("Cannot run Gitleaks on more than one mode\n")
+	} else if opts.LocalMode && opts.Tmp {
+		return fmt.Errorf("Cannot run Gitleaks with temp settings and local mode\n")
+	} else if opts.SinceCommit != "" && (opts.OrgMode || opts.UserMode) {
+		return fmt.Errorf("Cannot run Gitleaks with since commit flag and a owner mode\n")
+	} else if opts.ClonePath != "" && opts.Tmp {
+		return fmt.Errorf("Cannot run Gitleaks with --clone-path set and temporary repo\n")
+	}
+
+	return nil
+}
+
+// isGithubTarget checks if url is a valid github target
+func isGithubTarget(url string) bool {
+	re := regexp.MustCompile("github.com")
+	return re.MatchString(url)
 }

+ 88 - 0
options_test.go

@@ -0,0 +1,88 @@
+package main
+
+import (
+	"testing"
+)
+
+func TestNextInt(t *testing.T) {
+	args := []string{"-c", "10"}
+	i := 0
+	opts, err := defaultOptions()
+	if err != nil {
+		t.Error()
+	}
+	n := opts.nextInt(args, &i)
+	if n != 10 {
+		t.Error()
+	}
+}
+
+func TestNextString(t *testing.T) {
+	args := []string{"--fake", "flag"}
+	i := 0
+	opts, err := defaultOptions()
+	if err != nil {
+		t.Error()
+	}
+	n := opts.nextString(args, &i)
+	if n != "flag" {
+		t.Error()
+	}
+}
+
+func TestOptString(t *testing.T) {
+	opts, err := defaultOptions()
+	if err != nil {
+		t.Error()
+	}
+	match, n := opts.optString("--fake=flag", "--fake=")
+	if !match || n != "flag" {
+		t.Error()
+	}
+}
+
+func TestOptInt(t *testing.T) {
+	opts, err := defaultOptions()
+	if err != nil {
+		t.Error()
+	}
+	match, n := opts.optInt("--fake=10", "--fake=")
+	if !match || n != 10 {
+		t.Error()
+	}
+}
+
+func TestParseOptions(t *testing.T) {
+	opts, err := defaultOptions()
+	opts.URL = "github.com/sample"
+	if err != nil {
+		t.Error()
+	}
+	opts.RepoMode = false
+	opts.UserMode = true
+	opts.LocalMode = true
+	err = opts.guards()
+	if err == nil {
+		t.Error()
+	}
+
+	opts.RepoMode = true
+	opts.UserMode = false
+	opts.LocalMode = false
+	err = opts.guards()
+	if err != nil {
+		t.Error()
+	}
+}
+
+func TestGithubTarget(t *testing.T) {
+	if !isGithubTarget("github.com") {
+		t.Error()
+	}
+	if !isGithubTarget("https://github.com/") {
+		t.Error()
+	}
+	if !isGithubTarget("git@github.com:zricethezav/gitleaks.git") {
+		t.Error()
+	}
+}

+ 275 - 0
owner.go

@@ -0,0 +1,275 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"github.com/google/go-github/github"
+	"golang.org/x/oauth2"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+	"os/signal"
+	"path"
+	"strings"
+)
+
+// Owner blah blah
+type Owner struct {
+	name        string
+	url         string
+	accountType string
+	path        string
+	reportPath  string
+	repos       []Repo
+}
+
+// ownerPath is used by newOwner and is responsible for returning a path parsed from
+// opts.ClonePath, PWD, or a temporary directory. If a user provides --clone-path=$Home/Desktop/audits
+// then the owner path with be $HOME/Desktop/audits. If the user does not provide a --clone-path= argument
+// then ownerPath will return the current working directory. If the user sets the temporary option, then
+// ownerPath will be $TMPDIR/ownerName. For example running gitleaks on github.com/mozilla, ownerPath would
+// return $TMPDIR/mozilla
+func ownerPath(ownerName string) (string, error) {
+	if opts.Tmp {
+		dir, err := ioutil.TempDir("", ownerName)
+		return dir, err
+	} else if opts.ClonePath != "" {
+		if _, err := os.Stat(opts.ClonePath); os.IsNotExist(err) {
+			os.Mkdir(opts.ClonePath, os.ModePerm)
+		}
+		return opts.ClonePath, nil
+	} else {
+		return os.Getwd()
+	}
+}
+
+// newOwner is the entry point for gitleaks after all the options have been parsed and
+// is responsible for returning an Owner pointer. If running in localmode then the Owner
+// that gets created will create a single repo specified in opts.RepoPath. Otherwise
+// newOwner will go out to github and fetch all the repos associated with the owner if
+// gitleaks is running in owner mode. If gitleaks is running in a non-local repo mode, then
+// newOwner will skip hitting the github api and go directly to cloning.
+func newOwner() *Owner {
+	name := ownerName()
+	ownerPath, err := ownerPath(name)
+	if err != nil {
+		failF("%v", err)
+	}
+	owner := &Owner{
+		name:        name,
+		url:         opts.URL,
+		accountType: ownerType(),
+		path:        ownerPath,
+	}
+
+	// listen for ctrl-c
+	// NOTE: need some help on how to actually shut down gracefully.
+	// On interrupt a repo may still be trying to clone... This has no
+	// actual effect other than extraneous logging.
+	sigC := make(chan os.Signal, 1)
+	signal.Notify(sigC, os.Interrupt, os.Interrupt)
+	go func() {
+		<-sigC
+		if opts.Tmp {
+			owner.rmTmp()
+		}
+		os.Exit(ExitFailure)
+	}()
+
+	// if running on local repo, just go right to it.
+	if opts.LocalMode {
+		repo := newLocalRepo(opts.RepoPath)
+		owner.repos = append(owner.repos, *repo)
+		return owner
+	}
+
+	err = owner.fetchRepos()
+	if err != nil {
+		owner.failF("%v", err)
+	}
+	return owner
+}
+
+// fetchRepos is used by newOwner and is responsible for fetching one or more
+// of the owner's repos. If opts.RepoURL is not the empty string then fetchRepos will
+// only grab the repo specified in opts.RepoURL. Otherwise, fetchRepos will reach out to
+// github's api and grab all repos associated with owner.
+func (owner *Owner) fetchRepos() error {
+	var err error
+	ctx := context.Background()
+	if owner.accountType == "" {
+		// single repo, ambiguous account type
+		_, repoName := path.Split(opts.URL)
+		repo := newRepo(repoName, opts.URL, owner.path+"/"+repoName)
+		owner.repos = append(owner.repos, *repo)
+	} else {
+		// org or user account type, would fail if not valid before
+		tokenClient := githubTokenClient()
+		gitClient := github.NewClient(tokenClient)
+
+		if owner.accountType == "org" {
+			// org account type
+			orgOpt := &github.RepositoryListByOrgOptions{
+				ListOptions: github.ListOptions{PerPage: 10},
+			}
+			err = owner.fetchOrgRepos(ctx, orgOpt, gitClient)
+		} else {
+			// user account type
+			userOpt := &github.RepositoryListOptions{
+				ListOptions: github.ListOptions{PerPage: 10},
+			}
+			err = owner.fetchUserRepos(ctx, userOpt, gitClient)
+		}
+	}
+	return err
+}
+
+// fetchOrgRepos used by fetchRepos is responsible for parsing github's org repo response. If no
+// github token is available then fetchOrgRepos might run into a rate limit in which case owner will
+// log an error and gitleaks will exit. The rate limit for no token is 50 req/hour... not much.
+func (owner *Owner) fetchOrgRepos(ctx context.Context, orgOpts *github.RepositoryListByOrgOptions,
+	gitClient *github.Client) error {
+	var (
+		githubRepos []*github.Repository
+		resp        *github.Response
+		err         error
+	)
+
+	for {
+		githubRepos, resp, err = gitClient.Repositories.ListByOrg(
+			ctx, owner.name, orgOpts)
+		owner.addRepos(githubRepos)
+		if _, ok := err.(*github.RateLimitError); ok {
+			log.Printf("hit rate limit retreiving %s, continuing with partial audit\n",
+				owner.name)
+		} else if err != nil {
+			return fmt.Errorf("failed obtaining %s repos from githuib api, bad request", owner.name)
+		} else if resp.NextPage == 0 {
+			break
+		}
+		orgOpts.Page = resp.NextPage
+	}
+	return nil
+}
+
+// fetchUserRepos used by fetchRepos is responsible for parsing github's user repo response. If no
+// github token is available then fetchUserRepos might run into a rate limit in which case owner will
+// log an error and gitleaks will exit. The rate limit for no token is 50 req/hour... not much.
+// sorry for the redundancy
+func (owner *Owner) fetchUserRepos(ctx context.Context, userOpts *github.RepositoryListOptions,
+	gitClient *github.Client) error {
+	var (
+		githubRepos []*github.Repository
+		resp        *github.Response
+		err         error
+	)
+	for {
+		githubRepos, resp, err = gitClient.Repositories.List(
+			ctx, owner.name, userOpts)
+		owner.addRepos(githubRepos)
+		if _, ok := err.(*github.RateLimitError); ok {
+			log.Printf("hit rate limit retreiving %s, continuing with partial audit\n",
+				owner.name)
+			break
+		} else if err != nil {
+			return fmt.Errorf("failed obtaining %s repos from github api, bad request", owner.name)
+		} else if resp.NextPage == 0 {
+			break
+		}
+		userOpts.Page = resp.NextPage
+	}
+	return nil
+}
+
+// addRepos used by fetchUserRepos and fetchOrgRepos appends new repos from
+// github's org/user response.
+func (owner *Owner) addRepos(githubRepos []*github.Repository) {
+	for _, repo := range githubRepos {
+		owner.repos = append(owner.repos, *newRepo(*repo.Name, *repo.CloneURL, owner.path+"/"+*repo.Name))
+	}
+}
+
+// auditRepos is responsible for auditing all the owner's
+// repos. auditRepos is used by main and will return the following exit codes
+// 0: The audit succeeded with no findings
+// 1: The audit failed, or wasn't attempted due to an execution failure.
+// 2: The audit succeeded, and secrets / patterns were found.
+func (owner *Owner) auditRepos() int {
+	exitCode := ExitClean
+	for _, repo := range owner.repos {
+		leaksPst, err := repo.audit()
+		if err != nil {
+			failF("%v", err)
+		}
+		if leaksPst {
+			exitCode = ExitLeaks
+		}
+	}
+	if opts.Tmp {
+		owner.rmTmp()
+	}
+	return exitCode
+}
+
+// failF prints a failure message out to stderr
+// and exits with a exit code 2
+func (owner *Owner) failF(format string, args ...interface{}) {
+	fmt.Fprintf(os.Stderr, format, args...)
+	os.Exit(ExitFailure)
+}
+
+// rmTmp removes the owner's temporary repo. rmTmp will only get called if temporary
+// mode is set. rmTmp is called on a SIGINT and after the audits have finished
+func (owner *Owner) rmTmp() {
+	log.Printf("removing tmp gitleaks repo for %s\n", owner.path)
+	os.RemoveAll(owner.path)
+}
+
+// ownerType returns the owner type extracted from opts.
+// If no owner type is provided, gitleaks assumes the owner is ambiguous
+// and the user is running gitleaks on a single repo
+func ownerType() string {
+	if opts.OrgMode {
+		return "org"
+	} else if opts.UserMode {
+		return "user"
+	}
+	return ""
+}
+
+// ownerName returns the owner name extracted from the urls provided in opts.
+// If no RepoURL, OrgURL, or UserURL is provided, then owner will log an error
+// and gitleaks will exit.
+func ownerName() string {
+	if opts.RepoMode {
+		splitSlashes := strings.Split(opts.URL, "/")
+		return splitSlashes[len(splitSlashes)-2]
+	} else if opts.UserMode || opts.OrgMode {
+		_, ownerName := path.Split(opts.URL)
+		return ownerName
+	}
+	// local repo
+	return ""
+}
+
+// githubTokenClient creates an oauth client from your github access token.
+// Gitleaks will attempt to retrieve your github access token from a cli argument
+// or an env var - "GITHUB_TOKEN".
+func githubTokenClient() *http.Client {
+	var token string
+	if opts.Token != "" {
+		token = opts.Token
+	} else {
+		token = os.Getenv("GITHUB_TOKEN")
+	}
+	if token == "" {
+		return nil
+	}
+
+	tokenService := oauth2.StaticTokenSource(
+		&oauth2.Token{AccessToken: token},
+	)
+	tokenClient := oauth2.NewClient(context.Background(), tokenService)
+	return tokenClient
+}

+ 35 - 0
owner_test.go

@@ -0,0 +1,35 @@
+package main
+
+import (
+	"testing"
+	"os"
+)
+
+func TestOwnerPath(t *testing.T) {
+	opts, _ = defaultOptions()
+	p, err := ownerPath("testName")
+	if err != nil {
+		t.Error()
+	}
+	pwd, _ := os.Getwd()
+	if pwd != p {
+		t.Error()
+	}
+	opts.ClonePath = "test"
+	p, err = ownerPath("nameToIgnore")
+	if p != "test" {
+		t.Error()
+	}
+}
+
+func TestNewOwner(t *testing.T) {
+	opts, _ = defaultOptions()
+	owner := newOwner()
+
+	// default options will assume gitleaks is
+	// running on local mode
+	pwd, _ := os.Getwd()
+	if pwd != owner.path {
+		t.Error()
+	}
+}

+ 258 - 0
repo.go

@@ -0,0 +1,258 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"sync"
+)
+
+// Repo is
+type Repo struct {
+	name       string
+	url        string
+	path       string
+	status     string // TODO
+	leaks      []Leak
+	reportPath string
+}
+
+// Leak is
+type Leak struct {
+	Line     string `json:"line"`
+	Commit   string `json:"commit"`
+	Offender string `json:"string"`
+	Reason   string `json:"reason"`
+	Msg      string `json:"commitMsg"`
+	Time     string `json:"time"`
+	Author   string `json:"author"`
+	File     string `json:"file"`
+	RepoURL  string `json:"repoURL"`
+}
+
+// Commit is
+type Commit struct {
+	Hash   string
+	Author string
+	Time   string
+	Msg    string
+}
+
+// newLocalRepo will such and such
+func newLocalRepo(repoPath string) *Repo {
+	_, name := path.Split(repoPath)
+	repo := &Repo{
+		name:       name,
+		path:       repoPath,
+		reportPath: opts.ReportPath,
+	}
+	return repo
+
+}
+
+// newRepo
+func newRepo(name string, url string, path string) *Repo {
+	repo := &Repo{
+		name:       name,
+		url:        url,
+		path:       path,
+		reportPath: opts.ReportPath,
+	}
+	return repo
+}
+
+// rmTmp
+func (repo *Repo) rmTmp() {
+	log.Printf("removing tmp gitleaks repo %s\n", repo.path)
+	os.Remove(repo.path)
+}
+
+// Audit operates on a single repo and searches the full or partial history of the repo.
+// A semaphore is declared for every repo to bind concurrency. If unbounded, the system will throw a
+// `too many open files` error. Eventually, gitleaks should use src-d/go-git to avoid shelling out
+// commands so that users could opt for doing all clones/diffs in memory.
+// Audit also declares two WaitGroups, one for distributing regex/entropy checks, and one for receiving
+// the leaks if there are any. This could be done a little more elegantly in the future.
+func (repo *Repo) audit() (bool, error) {
+	var (
+		out               []byte
+		err               error
+		commitWG          sync.WaitGroup
+		gitLeakReceiverWG sync.WaitGroup
+		gitLeaksChan      = make(chan Leak)
+		leaks             []Leak
+		semaphoreChan     = make(chan struct{}, opts.Concurrency)
+		leaksPst          bool
+	)
+
+	if opts.Tmp {
+		defer repo.rmTmp()
+	}
+
+	dotGitPath := filepath.Join(repo.path, ".git")
+
+	// Navigate to proper location to being audit. Clone repo
+	// if not present, otherwise fetch for new changes.
+	if _, err := os.Stat(dotGitPath); os.IsNotExist(err) {
+		if opts.LocalMode {
+			return false, fmt.Errorf("%s does not exist", repo.path)
+		}
+		// no repo present, clone it
+		log.Printf("cloning \x1b[37;1m%s\x1b[0m into %s...\n", repo.url, repo.path)
+		err = exec.Command("git", "clone", repo.url, repo.path).Run()
+		if err != nil {
+			return false, fmt.Errorf("cannot clone %s into %s", repo.url, repo.path)
+		}
+	} else {
+		log.Printf("fetching \x1b[37;1m%s\x1b[0m from %s ...\n", repo.name, repo.path)
+		err = exec.Command("git", "fetch").Run()
+		if err != nil {
+			return false, fmt.Errorf("cannot fetch %s from %s", repo.url, repo.path)
+		}
+	}
+
+	err = os.Chdir(fmt.Sprintf(repo.path))
+	if err != nil {
+		return false, fmt.Errorf("cannot navigate to %s", repo.path)
+	}
+
+	gitFormat := "--format=%H%n%an%n%s%n%ci"
+	out, err = exec.Command("git", "rev-list", "--all",
+		"--remotes", "--topo-order", gitFormat).Output()
+
+	if err != nil {
+		return false, fmt.Errorf("could not retreive rev-list from %s", repo.name)
+	}
+
+	revListLines := bytes.Split(out, []byte("\n"))
+	commits := parseRevList(revListLines)
+
+	for _, commit := range commits {
+		if commit.Hash == "" {
+			continue
+		}
+
+		commitWG.Add(1)
+		go auditDiff(commit, repo, &commitWG, &gitLeakReceiverWG,
+			semaphoreChan, gitLeaksChan)
+
+		if commit.Hash == opts.SinceCommit {
+			break
+		}
+	}
+	go reportAggregator(&gitLeakReceiverWG, gitLeaksChan, &leaks)
+	commitWG.Wait()
+	gitLeakReceiverWG.Wait()
+	if len(leaks) != 0 {
+		leaksPst = true
+		log.Printf("\x1b[31;2mLEAKS DETECTED for %s\x1b[0m!\n", repo.name)
+	} else {
+		log.Printf("No Leaks detected for \x1b[32;2m%s\x1b[0m\n", repo.name)
+	}
+
+	if opts.ReportPath != "" && len(leaks) != 0 {
+		err = repo.writeReport(leaks)
+		if err != nil {
+			return leaksPst, fmt.Errorf("could not write report to %s", opts.ReportPath)
+		}
+	}
+	return leaksPst, nil
+}
+
+// Used by audit, writeReport will generate a report and write it out to
+// --report-path=<path> if specified, otherwise a report will be generated to
+// $PWD/<repo_name>_leaks.json. No report will be generated if
+// no leaks have been found or --report-out is not set.
+func (repo *Repo) writeReport(leaks []Leak) error {
+	reportJSON, _ := json.MarshalIndent(leaks, "", "\t")
+	if _, err := os.Stat(opts.ReportPath); os.IsNotExist(err) {
+		os.MkdirAll(opts.ReportPath, os.ModePerm)
+	}
+	reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
+	reportFile := filepath.Join(repo.reportPath, reportFileName)
+	err := ioutil.WriteFile(reportFile, reportJSON, 0644)
+	if err != nil {
+		return err
+	}
+	log.Printf("report for %s written to %s", repo.name, reportFile)
+	return nil
+}
+
+// parseRevList is responsible for parsing the output of
+// $ `git rev-list --all -remotes --topo-order --format=%H%n%an%n%s%n%ci`
+// sample output from the above command looks like:
+//		...
+// 		SHA
+// 		Author Name
+// 		Commit Msg
+// 		Commit Date
+//		...
+// Used by audit
+func parseRevList(revList [][]byte) []Commit {
+	var commits []Commit
+	for i := 0; i < len(revList)-1; i = i + 5 {
+		commit := Commit{
+			Hash:   string(revList[i+1]),
+			Author: string(revList[i+2]),
+			Msg:    string(revList[i+3]),
+			Time:   string(revList[i+4]),
+		}
+		commits = append(commits, commit)
+	}
+	return commits
+}
+
+// reportAggregator is a go func responsible for ...
+func reportAggregator(gitLeakReceiverWG *sync.WaitGroup, gitLeaks chan Leak, leaks *[]Leak) {
+	for gitLeak := range gitLeaks {
+		*leaks = append(*leaks, gitLeak)
+		if opts.Verbose {
+			b, err := json.MarshalIndent(gitLeak, "", "   ")
+			if err != nil {
+				// handle this?
+				fmt.Printf("failed to output leak: %v", err)
+			}
+			fmt.Println(string(b))
+		}
+		gitLeakReceiverWG.Done()
+	}
+}
+
+// Used by audit, auditDiff is a go func responsible for diffing and auditing a commit.
+// Three channels are input here: 1. a semaphore to bind gitleaks, 2. a leak stream, 3. error handling (TODO)
+// This func performs a diff and runs regexes checks on each line of the diff.
+func auditDiff(currCommit Commit, repo *Repo, commitWG *sync.WaitGroup,
+	gitLeakReceiverWG *sync.WaitGroup, semaphoreChan chan struct{},
+	gitLeaks chan Leak) {
+	// signal to WG this diff is done being audited
+	defer commitWG.Done()
+
+	if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
+		// TODO handle this better
+		os.Exit(ExitFailure)
+	}
+
+	commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
+	semaphoreChan <- struct{}{}
+	out, err := exec.Command("git", "diff", commitCmp).Output()
+	<-semaphoreChan
+
+	if err != nil {
+		os.Exit(ExitFailure)
+	}
+
+	leaks := doChecks(string(out), currCommit, repo)
+	if len(leaks) == 0 {
+		return
+	}
+	for _, leak := range leaks {
+		gitLeakReceiverWG.Add(1)
+		gitLeaks <- leak
+	}
+}

+ 65 - 0
repo_test.go

@@ -0,0 +1,65 @@
+package main
+
+import (
+	"os"
+	"testing"
+)
+
+func TestNewLocalRepo(t *testing.T) {
+	r := newLocalRepo("")
+	if r.path != "" {
+		t.Error()
+	}
+	r = newLocalRepo("some/path")
+	if r.name != "path" || r.path != "some/path" {
+		t.Error()
+	}
+}
+
+func TestWriteReport(t *testing.T) {
+	opts, _ = defaultOptions()
+	r := newRepo("fakerepo", "github.com", "")
+	r.leaks = []Leak{*sampleLeak(), *sampleLeak()}
+	r.writeReport(r.leaks)
+	if _, err := os.Stat("fakerepo_leaks.json"); os.IsNotExist(err) {
+		t.Error()
+	} else {
+		os.Remove("fakerepo_leaks.json")
+	}
+}
+
+func TestAudit(t *testing.T) {
+	opts, _ = defaultOptions()
+	opts.RepoMode = true
+	opts.Tmp = true
+	opts.URL = "https://github.com/zricethezav/gronit"
+	owner := newOwner()
+	r := newRepo("gronit", opts.URL, owner.path)
+	leaksPst, _ := r.audit()
+	if !leaksPst {
+		// TODO setup actual test repo
+		t.Error()
+	}
+
+	// new owner
+	opts.URL = "https://github.com/kelseyhightower/nocode"
+	owner = newOwner()
+	r = newRepo("nocode", opts.URL, owner.path)
+	leaksPst, _ = r.audit()
+	if leaksPst {
+		t.Error()
+	}
+}
+
+func sampleLeak() *Leak {
+	return &Leak{
+		Line:     "yoo",
+		Commit:   "mycommit",
+		Offender: "oh boy",
+		Reason:   "hello",
+		Msg:      "msg",
+		Time:     "time",
+		Author:   "lol",
+		RepoURL:  "yooo",
+	}
+}