checks.go 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. package main
  2. import (
  3. _ "fmt"
  4. "math"
  5. "strings"
  6. )
  7. // checks Regex and if enabled, entropy and stopwords
  8. func doChecks(diff string, commit Commit, repo *Repo) []Leak {
  9. var (
  10. match string
  11. leaks []Leak
  12. leak Leak
  13. )
  14. lines := strings.Split(diff, "\n")
  15. file := "unable to determine file"
  16. for _, line := range lines {
  17. if strings.Contains(line, "diff --git a") {
  18. idx := fileDiffRegex.FindStringIndex(line)
  19. if len(idx) == 2 {
  20. file = line[idx[1]:]
  21. }
  22. }
  23. for leakType, re := range regexes {
  24. match = re.FindString(line)
  25. if len(match) == 0 ||
  26. (opts.Strict && containsStopWords(line)) ||
  27. (opts.Entropy && !checkShannonEntropy(line, opts)) {
  28. continue
  29. }
  30. leak = Leak{
  31. Line: line,
  32. Commit: commit.Hash,
  33. Offender: match,
  34. Reason: leakType,
  35. Msg: commit.Msg,
  36. Time: commit.Time,
  37. Author: commit.Author,
  38. File: file,
  39. RepoURL: repo.url,
  40. }
  41. leaks = append(leaks, leak)
  42. }
  43. }
  44. return leaks
  45. }
  46. // checkShannonEntropy checks entropy of target
  47. func checkShannonEntropy(target string, opts *Options) bool {
  48. var (
  49. sum float64
  50. targetBase64Len int
  51. targetHexLen int
  52. base64Freq = make(map[rune]float64)
  53. hexFreq = make(map[rune]float64)
  54. bits int
  55. )
  56. index := assignRegex.FindStringIndex(target)
  57. if len(index) == 0 {
  58. return false
  59. }
  60. target = strings.Trim(target[index[1]:], " ")
  61. if len(target) > 100 {
  62. return false
  63. }
  64. // base64Shannon
  65. for _, i := range target {
  66. if strings.Contains(base64Chars, string(i)) {
  67. base64Freq[i]++
  68. targetBase64Len++
  69. }
  70. }
  71. for _, v := range base64Freq {
  72. f := v / float64(targetBase64Len)
  73. sum += f * math.Log2(f)
  74. }
  75. bits = int(math.Ceil(sum*-1)) * targetBase64Len
  76. if bits > opts.B64EntropyCutoff {
  77. return true
  78. }
  79. // hexShannon
  80. sum = 0
  81. for _, i := range target {
  82. if strings.Contains(hexChars, string(i)) {
  83. hexFreq[i]++
  84. targetHexLen++
  85. }
  86. }
  87. for _, v := range hexFreq {
  88. f := v / float64(targetHexLen)
  89. sum += f * math.Log2(f)
  90. }
  91. bits = int(math.Ceil(sum*-1)) * targetHexLen
  92. return bits > opts.HexEntropyCutoff
  93. }
  94. // containsStopWords checks if there are any stop words in target
  95. func containsStopWords(target string) bool {
  96. // Convert to lowercase to reduce the number of loops needed.
  97. target = strings.ToLower(target)
  98. for _, stopWord := range stopWords {
  99. if strings.Contains(target, stopWord) {
  100. return true
  101. }
  102. }
  103. return false
  104. }