checks.go 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. package main
  2. import (
  3. _ "fmt"
  4. "math"
  5. "strings"
  6. )
  7. // TODO LOCAL REPO!!!!
  8. // checks Regex and if enabled, entropy and stopwords
  9. func doChecks(diff string, commit Commit, opts *Options, repo RepoDesc) []LeakElem {
  10. var (
  11. match string
  12. leaks []LeakElem
  13. leak LeakElem
  14. )
  15. lines := strings.Split(diff, "\n")
  16. file := "unable to determine file"
  17. for _, line := range lines {
  18. if strings.Contains(line, "diff --git a") {
  19. idx := fileDiffRegex.FindStringIndex(line)
  20. if len(idx) == 2 {
  21. file = line[idx[1]:]
  22. }
  23. }
  24. for leakType, re := range regexes {
  25. match = re.FindString(line)
  26. if len(match) == 0 ||
  27. (opts.Strict && containsStopWords(line)) ||
  28. (opts.Entropy && !checkShannonEntropy(line, opts)) {
  29. continue
  30. }
  31. leak = LeakElem{
  32. Line: line,
  33. Commit: commit.Hash,
  34. Offender: match,
  35. Reason: leakType,
  36. Msg: commit.Msg,
  37. Time: commit.Time,
  38. Author: commit.Author,
  39. File: file,
  40. RepoURL: repo.url,
  41. }
  42. leaks = append(leaks, leak)
  43. }
  44. }
  45. return leaks
  46. }
  47. // checkShannonEntropy checks entropy of target
  48. func checkShannonEntropy(target string, opts *Options) bool {
  49. var (
  50. sum float64
  51. targetBase64Len int
  52. targetHexLen int
  53. base64Freq = make(map[rune]float64)
  54. hexFreq = make(map[rune]float64)
  55. bits int
  56. )
  57. index := assignRegex.FindStringIndex(target)
  58. if len(index) == 0 {
  59. return false
  60. }
  61. target = strings.Trim(target[index[1]:], " ")
  62. if len(target) > 100 {
  63. return false
  64. }
  65. // base64Shannon
  66. for _, i := range target {
  67. if strings.Contains(base64Chars, string(i)) {
  68. base64Freq[i]++
  69. targetBase64Len++
  70. }
  71. }
  72. for _, v := range base64Freq {
  73. f := v / float64(targetBase64Len)
  74. sum += f * math.Log2(f)
  75. }
  76. bits = int(math.Ceil(sum*-1)) * targetBase64Len
  77. if bits > opts.B64EntropyCutoff {
  78. return true
  79. }
  80. // hexShannon
  81. sum = 0
  82. for _, i := range target {
  83. if strings.Contains(hexChars, string(i)) {
  84. hexFreq[i]++
  85. targetHexLen++
  86. }
  87. }
  88. for _, v := range hexFreq {
  89. f := v / float64(targetHexLen)
  90. sum += f * math.Log2(f)
  91. }
  92. bits = int(math.Ceil(sum*-1)) * targetHexLen
  93. return bits > opts.HexEntropyCutoff
  94. }
  95. // containsStopWords checks if there are any stop words in target
  96. func containsStopWords(target string) bool {
  97. // Convert to lowercase to reduce the number of loops needed.
  98. target = strings.ToLower(target)
  99. for _, stopWord := range stopWords {
  100. if strings.Contains(target, stopWord) {
  101. return true
  102. }
  103. }
  104. return false
  105. }