checks.go 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. package main
  2. import (
  3. "math"
  4. "strings"
  5. )
  6. // checks Regex and if enabled, entropy and stopwords
  7. func doChecks(diff string, commit Commit, repo *Repo) []Leak {
  8. var (
  9. match string
  10. leaks []Leak
  11. leak Leak
  12. )
  13. lines := strings.Split(diff, "\n")
  14. file := "unable to determine file"
  15. for _, line := range lines {
  16. if strings.Contains(line, "diff --git a") {
  17. idx := fileDiffRegex.FindStringIndex(line)
  18. if len(idx) == 2 {
  19. file = line[idx[1]:]
  20. }
  21. }
  22. for leakType, re := range regexes {
  23. match = re.FindString(line)
  24. if len(match) == 0 ||
  25. (opts.Strict && containsStopWords(line)) ||
  26. (opts.Entropy && !checkShannonEntropy(line, opts)) {
  27. continue
  28. }
  29. leak = Leak{
  30. Line: line,
  31. Commit: commit.Hash,
  32. Offender: match,
  33. Reason: leakType,
  34. Msg: commit.Msg,
  35. Time: commit.Time,
  36. Author: commit.Author,
  37. File: file,
  38. RepoURL: repo.url,
  39. }
  40. leaks = append(leaks, leak)
  41. }
  42. }
  43. return leaks
  44. }
  45. // checkShannonEntropy checks entropy of target
  46. func checkShannonEntropy(target string, opts *Options) bool {
  47. var (
  48. sum float64
  49. targetBase64Len int
  50. targetHexLen int
  51. base64Freq = make(map[rune]float64)
  52. hexFreq = make(map[rune]float64)
  53. bits int
  54. )
  55. index := assignRegex.FindStringIndex(target)
  56. if len(index) == 0 {
  57. return false
  58. }
  59. target = strings.Trim(target[index[1]:], " ")
  60. if len(target) > 100 {
  61. return false
  62. }
  63. // base64Shannon
  64. for _, i := range target {
  65. if strings.Contains(base64Chars, string(i)) {
  66. base64Freq[i]++
  67. targetBase64Len++
  68. }
  69. }
  70. for _, v := range base64Freq {
  71. f := v / float64(targetBase64Len)
  72. sum += f * math.Log2(f)
  73. }
  74. bits = int(math.Ceil(sum*-1)) * targetBase64Len
  75. if bits > opts.B64EntropyCutoff {
  76. return true
  77. }
  78. // hexShannon
  79. sum = 0
  80. for _, i := range target {
  81. if strings.Contains(hexChars, string(i)) {
  82. hexFreq[i]++
  83. targetHexLen++
  84. }
  85. }
  86. for _, v := range hexFreq {
  87. f := v / float64(targetHexLen)
  88. sum += f * math.Log2(f)
  89. }
  90. bits = int(math.Ceil(sum*-1)) * targetHexLen
  91. return bits > opts.HexEntropyCutoff
  92. }
  93. // containsStopWords checks if there are any stop words in target
  94. func containsStopWords(target string) bool {
  95. // Convert to lowercase to reduce the number of loops needed.
  96. target = strings.ToLower(target)
  97. for _, stopWord := range stopWords {
  98. if strings.Contains(target, stopWord) {
  99. return true
  100. }
  101. }
  102. return false
  103. }