Просмотр исходного кода

fixing a bug, adding csv report option and leak caching

zach rice 6 лет назад
Родитель
Сommit
3211cfbbb1
5 измененных файлов с 267 добавлено и 55 удалено
  1. 7 7
      Makefile
  2. 14 0
      audit/util.go
  3. 169 0
      examples/leaky-repo.toml
  4. 55 27
      manager/manager.go
  5. 22 21
      options/options.go

+ 7 - 7
Makefile

@@ -27,13 +27,13 @@ build:
 release-builds:
 	rm -rf build
 	mkdir build
-	env GOOS="windows" GOARCH="amd64" go build -o "build/gitleaks-windows-amd64.exe"
-	env GOOS="windows" GOARCH="386" go build -o "build/gitleaks-windows-386.exe"
-	env GOOS="linux" GOARCH="amd64" go build -o "build/gitleaks-linux-amd64"
-	env GOOS="linux" GOARCH="arm" go build -o "build/gitleaks-linux-arm"
-	env GOOS="linux" GOARCH="mips" go build -o "build/gitleaks-linux-mips"
-	env GOOS="linux" GOARCH="mips" go build -o "build/gitleaks-linux-mips"
-	env GOOS="darwin" GOARCH="amd64" go build -o "build/gitleaks-darwin-amd64"
+	env GOOS="windows" GOARCH="amd64" go build -o "build/gitleaks-windows-amd64.exe" $(LDFLAGS)
+	env GOOS="windows" GOARCH="386" go build -o "build/gitleaks-windows-386.exe" $(LDFLAGS)
+	env GOOS="linux" GOARCH="amd64" go build -o "build/gitleaks-linux-amd64" $(LDFLAGS)
+	env GOOS="linux" GOARCH="arm" go build -o "build/gitleaks-linux-arm" $(LDFLAGS)
+	env GOOS="linux" GOARCH="mips" go build -o "build/gitleaks-linux-mips" $(LDFLAGS)
+	env GOOS="linux" GOARCH="mips" go build -o "build/gitleaks-linux-mips" $(LDFLAGS)
+	env GOOS="darwin" GOARCH="amd64" go build -o "build/gitleaks-darwin-amd64" $(LDFLAGS)
 
 deploy:
 	@echo "$(DOCKER_PASSWORD)" | docker login -u "$(DOCKER_USERNAME)" --password-stdin

+ 14 - 0
audit/util.go

@@ -270,6 +270,20 @@ func inspectCommit(c *object.Commit, repo *Repo) error {
 			log.Debugf("whitelisted file found, skipping audit of file: %s", f.Name)
 			return nil
 		}
+
+		if fileMatched(f.Name, repo.config.FileRegex) {
+			repo.Manager.SendLeaks(manager.Leak{
+				Line:     "N/A",
+				Offender: f.Name,
+				Commit:   c.Hash.String(),
+				Repo:     repo.Name,
+				Rule:     "file regex matched" + repo.config.FileRegex.String(),
+				Author:   c.Author.Name,
+				Email:    c.Author.Email,
+				Date:     c.Author.When,
+				File:     f.Name,
+			})
+		}
 		content, err := f.Contents()
 		if err != nil {
 			return err

+ 169 - 0
examples/leaky-repo.toml

@@ -0,0 +1,169 @@
+title = "gitleaks config"
+
+[[rules]]
+	description = "AWS Manager ID"
+	regex = '''(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'''
+	tags = ["key", "AWS"]
+
+[[rules]]
+	description = "AWS cred file info"
+	regex = '''(?i)(aws_access_key_id|aws_secret_access_key)(.{0,20})?=.[0-9a-zA-Z\/+]{20,40}'''
+	tags = ["AWS"]
+
+[[rules]]
+	description = "AWS Secret Key"
+	regex = '''(?i)aws(.{0,20})?(?-i)['\"][0-9a-zA-Z\/+]{40}['\"]'''
+	tags = ["key", "AWS"]
+
+[[rules]]
+	description = "AWS MWS key"
+	regex = '''amzn\.mws\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}'''
+	tags = ["key", "AWS", "MWS"]
+
+[[rules]]
+	description = "Facebook Secret Key"
+	regex = '''(?i)(facebook|fb)(.{0,20})?(?-i)['\"][0-9a-f]{32}['\"]'''
+	tags = ["key", "Facebook"]
+
+[[rules]]
+	description = "Facebook Client ID"
+	regex = '''(?i)(facebook|fb)(.{0,20})?['\"][0-9]{13,17}['\"]'''
+	tags = ["key", "Facebook"]
+
+[[rules]]
+	description = "Twitter Secret Key"
+	regex = '''(?i)twitter(.{0,20})?['\"][0-9a-z]{35,44}['\"]'''
+	tags = ["key", "Twitter"]
+
+[[rules]]
+	description = "Twitter Client ID"
+	regex = '''(?i)twitter(.{0,20})?['\"][0-9a-z]{18,25}['\"]'''
+	tags = ["client", "Twitter"]
+
+[[rules]]
+	description = "Github"
+	regex = '''(?i)github(.{0,20})?(?-i)['\"][0-9a-zA-Z]{35,40}['\"]'''
+	tags = ["key", "Github"]
+
+[[rules]]
+	description = "LinkedIn Client ID"
+	regex = '''(?i)linkedin(.{0,20})?(?-i)['\"][0-9a-z]{12}['\"]'''
+	tags = ["client", "LinkedIn"]
+
+[[rules]]
+	description = "LinkedIn Secret Key"
+	regex = '''(?i)linkedin(.{0,20})?['\"][0-9a-z]{16}['\"]'''
+	tags = ["secret", "LinkedIn"]
+
+[[rules]]
+	description = "Slack"
+	regex = '''xox[baprs]-([0-9a-zA-Z]{10,48})?'''
+	tags = ["key", "Slack"]
+
+[[rules]]
+	description = "EC"
+	regex = '''-----BEGIN EC PRIVATE KEY-----'''
+	tags = ["key", "EC"]
+
+
+[[rules]]
+	description = "Google API key"
+	regex = '''AIza[0-9A-Za-z\\-_]{35}'''
+	tags = ["key", "Google"]
+
+
+[[rules]]
+	description = "Heroku API key"
+	regex = '''(?i)heroku(.{0,20})?['"][0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}['"]'''
+	tags = ["key", "Heroku"]
+
+[[rules]]
+	description = "MailChimp API key"
+	regex = '''(?i)(mailchimp|mc)(.{0,20})?['"][0-9a-f]{32}-us[0-9]{1,2}['"]'''
+	tags = ["key", "Mailchimp"]
+
+[[rules]]
+	description = "Mailgun API key"
+	regex = '''(?i)(mailgun|mg)(.{0,20})?['"][0-9a-z]{32}['"]'''
+	tags = ["key", "Mailgun"]
+
+[[rules]]
+	description = "PayPal Braintree access token"
+	regex = '''access_token\$production\$[0-9a-z]{16}\$[0-9a-f]{32}'''
+	tags = ["key", "Paypal"]
+
+[[rules]]
+	description = "Picatic API key"
+	regex = '''sk_live_[0-9a-z]{32}'''
+	tags = ["key", "Picatic"]
+
+[[rules]]
+	description = "Slack Webhook"
+	regex = '''https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}'''
+	tags = ["key", "slack"]
+
+[[rules]]
+	description = "Stripe API key"
+	regex = '''(?i)stripe(.{0,20})?['\"][sk|rk]_live_[0-9a-zA-Z]{24}'''
+	tags = ["key", "Stripe"]
+
+[[rules]]
+	description = "Square access token"
+	regex = '''sq0atp-[0-9A-Za-z\-_]{22}'''
+	tags = ["key", "square"]
+
+[[rules]]
+	description = "Square OAuth secret"
+	regex = '''sq0csp-[0-9A-Za-z\\-_]{43}'''
+	tags = ["key", "square"]
+
+[[rules]]
+	description = "Twilio API key"
+	regex = '''(?i)twilio(.{0,20})?['\"][0-9a-f]{32}['\"]'''
+	tags = ["key", "twilio"]
+
+[[rules]]
+	description = "Env Var"
+	regex = '''(?i)(api_key|apikey|secret|key|api|password|pw|host)=[0-9a-zA-Z-_{}]{4,120}'''
+
+[[rules]]
+	description = "Port"
+	regex = '''(?i)port(.{0,4})?[0-9]{1,10}'''
+
+[[rules]]
+	description = "Email"
+	regex = '''[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}'''
+	tags = ["email"]
+
+[[rules]]
+	description = "Generic Credential"
+	regex = '''(?i)(dbpasswd|dbuser|dbname|dbhost|api_key|apikey|secret|key|api|password|user|guid|hostname|pw|auth)(.{0,20})?['|"][0-9a-zA-Z-_!{}/=]{4,120}['|"]'''
+	tags = ["key", "API", "generic"]
+
+[[rules]]
+	description = "WP-Config"
+	regex='''define(.{0,20})?(DB_CHARSET|NONCE_SALT|LOGGED_IN_SALT|AUTH_SALT|NONCE_KEY|DB_HOST|DB_PASSWORD|AUTH_KEY|SECURE_AUTH_KEY|LOGGED_IN_KEY|DB_NAME|DB_USER)(.{0,20})?['|"].{10,120}['|"]'''
+	tags = ["key", "API", "generic"]
+
+
+[[rules]]
+	description = "Pure Entropy"
+	regex = '''['|"][0-9a-zA-Z-._{}$\/=]{40,120}['|"]'''
+	entropies = [
+        "5.0-5.6"
+	]
+
+[[rules]]
+	description = "Entropy plus Generic Credential"
+	regex = '''(?i)(api_key|apikey|secret|key|api|password|pw)'''
+	entropies = [
+        "5.2-5.5"
+	]
+
+[Global]
+    file = '''(?i)(id_rsa|passwd|id_rsa.pub|pgpass|pem|key|shadow)'''
+
+[whitelist]
+	description = "image whitelists"
+	file = '''(.*?)(jpg|gif|doc|pdf|bin)$'''
+

+ 55 - 27
manager/manager.go

@@ -1,6 +1,9 @@
 package manager
 
 import (
+	"crypto/sha1"
+	"encoding/csv"
+	"encoding/hex"
 	"encoding/json"
 	"fmt"
 	"github.com/hako/durafmt"
@@ -26,9 +29,10 @@ type Manager struct {
 	CloneOptions *git.CloneOptions
 	CloneDir     string
 
-	leaks    []Leak
-	leakChan chan Leak
-	leakWG   *sync.WaitGroup
+	leaks     []Leak
+	leakChan  chan Leak
+	leakWG    *sync.WaitGroup
+	leakCache map[string]bool
 
 	stopChan chan os.Signal
 	metadata Metadata
@@ -38,17 +42,18 @@ type Manager struct {
 // Leak is a struct that contains information about some line of code that contains
 // sensitive information as determined by the rules set in a gitleaks config
 type Leak struct {
-	Line     string    `json:"line"`
-	Offender string    `json:"offender"`
-	Commit   string    `json:"commit"`
-	Repo     string    `json:"repo"`
-	Rule     string    `json:"rule"`
-	Message  string    `json:"commitMessage"`
-	Author   string    `json:"author"`
-	Email    string    `json:"email"`
-	File     string    `json:"file"`
-	Date     time.Time `json:"date"`
-	Tags     string    `json:"tags"`
+	Line       string    `json:"line"`
+	Offender   string    `json:"offender"`
+	Commit     string    `json:"commit"`
+	Repo       string    `json:"repo"`
+	Rule       string    `json:"rule"`
+	Message    string    `json:"commitMessage"`
+	Author     string    `json:"author"`
+	Email      string    `json:"email"`
+	File       string    `json:"file"`
+	Date       time.Time `json:"date"`
+	Tags       string    `json:"tags"`
+	lookupHash string
 }
 
 // AuditTime is a type used to determine total audit time
@@ -104,15 +109,30 @@ func (manager *Manager) GetLeaks() []Leak {
 // SendLeaks accepts a leak and is used by the audit pkg. This is the public function
 // that allows other packages to send leaks to the manager.
 func (manager *Manager) SendLeaks(l Leak) {
+	h := sha1.New()
+	h.Write([]byte(l.Commit + l.Offender + l.File))
+	l.lookupHash = hex.EncodeToString(h.Sum(nil))
 	manager.leakWG.Add(1)
 	manager.leakChan <- l
 }
 
+func (manager *Manager) alreadySeen(leak Leak) bool {
+	if _, ok := manager.leakCache[leak.lookupHash]; ok {
+		return true
+	}
+	manager.leakCache[leak.lookupHash] = true
+	return false
+}
+
 // receiveLeaks listens to leakChan for incoming leaks. If any are received, they are appended to the
 // manager's leaks for future reporting. If the -v/--verbose option is set the leaks will marshaled into
 // json and printed out.
 func (manager *Manager) receiveLeaks() {
 	for leak := range manager.leakChan {
+		if manager.alreadySeen(leak) {
+			manager.leakWG.Done()
+			continue
+		}
 		manager.leaks = append(manager.leaks, leak)
 		if manager.Opts.Verbose {
 			var b []byte
@@ -178,10 +198,11 @@ func NewManager(opts options.Options, cfg config.Config) (*Manager, error) {
 		Config:       cfg,
 		CloneOptions: cloneOpts,
 
-		stopChan: make(chan os.Signal, 1),
-		leakChan: make(chan Leak),
-		leakWG:   &sync.WaitGroup{},
-		metaWG:   &sync.WaitGroup{},
+		stopChan:  make(chan os.Signal, 1),
+		leakChan:  make(chan Leak),
+		leakWG:    &sync.WaitGroup{},
+		leakCache: make(map[string]bool),
+		metaWG:    &sync.WaitGroup{},
 		metadata: Metadata{
 			RegexTime: make(map[string]int64),
 			timings:   make(chan interface{}),
@@ -241,16 +262,23 @@ func (manager *Manager) Report() error {
 			return err
 		}
 
-		encoder := json.NewEncoder(file)
-		encoder.SetIndent("", " ")
-		err = encoder.Encode(manager.leaks)
-		if err != nil {
-			return err
-		}
-		err = file.Close()
-		if err != nil {
-			return err
+		if manager.Opts.ReportFormat == "json" {
+			encoder := json.NewEncoder(file)
+			encoder.SetIndent("", " ")
+			err = encoder.Encode(manager.leaks)
+			if err != nil {
+				return err
+			}
+		} else {
+			w := csv.NewWriter(file)
+			w.Write([]string{"repo", "line", "commit", "offender", "rule", "tags", "commitMsg", "author", "email", "file", "date"})
+			for _, leak := range manager.GetLeaks() {
+				w.Write([]string{leak.Repo, leak.Line, leak.Commit, leak.Offender, leak.Rule, leak.Tags, leak.Message, leak.Author, leak.Email, leak.File, leak.Date.Format(time.RFC3339)})
+			}
+			w.Flush()
 		}
+		file.Close()
+
 		log.Infof("report written to %s", manager.Opts.Report)
 	}
 	return nil

+ 22 - 21
options/options.go

@@ -25,27 +25,28 @@ const (
 
 // Options stores values of command line options
 type Options struct {
-	Verbose     bool   `short:"v" long:"verbose" description:"Show verbose output from audit"`
-	Repo        string `short:"r" long:"repo" description:"Target repository"`
-	Config      string `long:"config" description:"config path"`
-	Disk        bool   `long:"disk" description:"Clones repo(s) to disk"`
-	Version     bool   `long:"version" description:"version number"`
-	Timeout     int    `long:"timeout" description:"Timeout (s)"`
-	Username    string `long:"username" description:"Username for git repo"`
-	Password    string `long:"password" description:"Password for git repo"`
-	AccessToken string `long:"access-token" description:"Access token for git repo"`
-	Commit      string `long:"commit" description:"sha of commit to audit"`
-	Threads     int    `long:"threads" description:"Maximum number of threads gitleaks spawns"`
-	SSH         string `long:"ssh-key" description:"path to ssh key used for auth"`
-	Uncommited  bool   `long:"uncommitted" description:"run gitleaks on uncommitted code"`
-	RepoPath    string `long:"repo-path" description:"Path to repo"`
-	OwnerPath   string `long:"owner-path" description:"Path to owner directory (repos discovered)"`
-	Branch      string `long:"branch" description:"Branch to audit"`
-	Report      string `long:"report" description:"path to write json leaks file"`
-	Redact      bool   `long:"redact" description:"redact secrets from log messages and leaks"`
-	Debug       bool   `long:"debug" description:"log debug messages"`
-	RepoConfig  bool   `long:"repo-config" description:"Load config from target repo. Config file must be \".gitleaks.toml\" or \"gitleaks.toml\""`
-	PrettyPrint bool   `long:"pretty" description:"Pretty print json if leaks are present"`
+	Verbose      bool   `short:"v" long:"verbose" description:"Show verbose output from audit"`
+	Repo         string `short:"r" long:"repo" description:"Target repository"`
+	Config       string `long:"config" description:"config path"`
+	Disk         bool   `long:"disk" description:"Clones repo(s) to disk"`
+	Version      bool   `long:"version" description:"version number"`
+	Timeout      int    `long:"timeout" description:"Timeout (s)"`
+	Username     string `long:"username" description:"Username for git repo"`
+	Password     string `long:"password" description:"Password for git repo"`
+	AccessToken  string `long:"access-token" description:"Access token for git repo"`
+	Commit       string `long:"commit" description:"sha of commit to audit"`
+	Threads      int    `long:"threads" description:"Maximum number of threads gitleaks spawns"`
+	SSH          string `long:"ssh-key" description:"path to ssh key used for auth"`
+	Uncommited   bool   `long:"uncommitted" description:"run gitleaks on uncommitted code"`
+	RepoPath     string `long:"repo-path" description:"Path to repo"`
+	OwnerPath    string `long:"owner-path" description:"Path to owner directory (repos discovered)"`
+	Branch       string `long:"branch" description:"Branch to audit"`
+	Report       string `long:"report" description:"path to write json leaks file"`
+	ReportFormat string `long:"report-format" default:"json" description:"json or csv"`
+	Redact       bool   `long:"redact" description:"redact secrets from log messages and leaks"`
+	Debug        bool   `long:"debug" description:"log debug messages"`
+	RepoConfig   bool   `long:"repo-config" description:"Load config from target repo. Config file must be \".gitleaks.toml\" or \"gitleaks.toml\""`
+	PrettyPrint  bool   `long:"pretty" description:"Pretty print json if leaks are present"`
 
 	// Hosts
 	Host         string `long:"host" description:"git hosting service like gitlab or github. Supported hosts include: Github, Gitlab"`