Parcourir la source

[progress] shannon entropy, need to include stop words in target key

zricethezav il y a 8 ans
Parent
commit
4fd6ee3ab3
2 fichiers modifiés avec 60 ajouts et 14 suppressions
  1. 35 10
      checks.go
  2. 25 4
      main.go

+ 35 - 10
checks.go

@@ -1,47 +1,72 @@
 package main
 package main
 
 
 import (
 import (
-	"fmt"
-	"github.com/nbutton23/zxcvbn-go"
+	_ "fmt"
+	//"github.com/nbutton23/zxcvbn-go"
+	"bytes"
+	"math"
 	"strings"
 	"strings"
 )
 )
 
 
 // check each line of a diff and see if there are any potential
 // check each line of a diff and see if there are any potential
 // secrets
 // secrets
-func checkRegex(diff string) ([]string, bool) {
+// https://people.eecs.berkeley.edu/~rohanpadhye/files/key_leaks-msr15.pdf
+func checkRegex(diff string) []string {
 	var match string
 	var match string
 	var results []string
 	var results []string
-	secretsPresent := false
 	lines := strings.Split(diff, "\n")
 	lines := strings.Split(diff, "\n")
 	for _, line := range lines {
 	for _, line := range lines {
-		if len(line) == 0 {
+		// doubtful a leak would be on a line > 120 characters
+		if len(line) == 0 || len(line) > 80 {
 			continue
 			continue
 		}
 		}
-
 		for _, re := range regexes {
 		for _, re := range regexes {
 			match = re.FindString(line)
 			match = re.FindString(line)
 			if len(match) == 0 {
 			if len(match) == 0 {
 				continue
 				continue
 			}
 			}
-			secretsPresent = true
 			results = append(results, line)
 			results = append(results, line)
 		}
 		}
 	}
 	}
-	return results, secretsPresent
+	return results
 }
 }
 
 
 // checkEntropy determines whether target contains enough
 // checkEntropy determines whether target contains enough
 // entropy for a hash
 // entropy for a hash
+// TODO remove stop words:
+// setting(s), config(s), property(s), etc
 func checkEntropy(target string) bool {
 func checkEntropy(target string) bool {
 	index := assignRegex.FindStringIndex(target)
 	index := assignRegex.FindStringIndex(target)
 	if len(index) == 0 {
 	if len(index) == 0 {
 		return false
 		return false
 	}
 	}
+
+	// TODO check for stop words here
+
 	target = strings.Trim(target[index[1]:len(target)], " ")
 	target = strings.Trim(target[index[1]:len(target)], " ")
-	entropy := zxcvbn.PasswordStrength(target, nil).Entropy
+	entropy := shannonEntropy(target)
+
 	// tune this/make option
 	// tune this/make option
-	if entropy > 70 {
+	if entropy > 3.5 {
 		return true
 		return true
 	}
 	}
 	return false
 	return false
 }
 }
+
+func shannonEntropy(target string) float32 {
+	freqs := make(map[byte]float64)
+	targetBytes := []byte(target)
+	entropy := float64(0)
+	for i := 0; i < 256; i++ {
+		freqs[byte(i)] = 0
+	}
+	ln := len(target)
+	for k, _ := range freqs {
+		px := float64(bytes.Count(targetBytes, []byte{k})) / float64(ln)
+		freqs[k] = px
+		if px > 0 {
+			entropy += -float64(px) * math.Log2(px)
+		}
+	}
+	return float32(entropy)
+}

+ 25 - 4
main.go

@@ -28,6 +28,8 @@ var assignRegex *regexp.Regexp
 func init() {
 func init() {
 	appRoot, _ = os.Getwd()
 	appRoot, _ = os.Getwd()
 	cache = make(map[string]bool)
 	cache = make(map[string]bool)
+	// TODO update regex to look for things like:
+	// client("fewafewakwafejwkaf",
 	regexes = map[string]*regexp.Regexp{
 	regexes = map[string]*regexp.Regexp{
 		"github":   regexp.MustCompile(`[g|G][i|I][t|T][h|H][u|U][b|B].*(=|:|:=|<-).*\w+.*`),
 		"github":   regexp.MustCompile(`[g|G][i|I][t|T][h|H][u|U][b|B].*(=|:|:=|<-).*\w+.*`),
 		"aws":      regexp.MustCompile(`[a|A][w|W][s|S].*(=|:=|:|<-).*\w+.*`),
 		"aws":      regexp.MustCompile(`[a|A][w|W][s|S].*(=|:=|:|<-).*\w+.*`),
@@ -84,6 +86,7 @@ func (repo Repo) audit() {
 	var err error
 	var err error
 	var branch string
 	var branch string
 	var commits [][]byte
 	var commits [][]byte
+	var leaks []string
 
 
 	out, err = exec.Command("git", "branch", "--all").Output()
 	out, err = exec.Command("git", "branch", "--all").Output()
 	if err != nil {
 	if err != nil {
@@ -110,20 +113,38 @@ func (repo Repo) audit() {
 
 
 			// TODO need a memoization structure for commitB vs commits[j+1]
 			// TODO need a memoization structure for commitB vs commits[j+1]
 			// memoize the actual diff function
 			// memoize the actual diff function
-			diff(string(commitB), string(commits[j+1]))
+			leaks = checkDiff(string(commitB), string(commits[j+1]))
+			if len(leaks) != 0 {
+				fmt.Println(leaks)
+			}
 		}
 		}
 	}
 	}
 }
 }
 
 
-func diff(commit1 string, commit2 string) {
+func checkDiff(commit1 string, commit2 string) []string {
+	var leakPrs bool
+	var leaks []string
 	_, seen := cache[commit1+commit2]
 	_, seen := cache[commit1+commit2]
 	if seen {
 	if seen {
-		return
+		return []string{}
 	}
 	}
+
 	out, err := exec.Command("git", "diff", commit1, commit2).Output()
 	out, err := exec.Command("git", "diff", commit1, commit2).Output()
 	if err != nil {
 	if err != nil {
 		log.Fatalf("error retrieving commits %v\n", err)
 		log.Fatalf("error retrieving commits %v\n", err)
 	}
 	}
+
 	cache[commit1+commit2] = true
 	cache[commit1+commit2] = true
-	fmt.Println(checkRegex(string(out)))
+	lines := checkRegex(string(out))
+	if len(lines) == 0 {
+		return []string{}
+	}
+
+	for _, line := range lines {
+		leakPrs = checkEntropy(line)
+		if leakPrs {
+			leaks = append(leaks, line)
+		}
+	}
+	return leaks
 }
 }