checks.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. package main
  2. import (
  3. "math"
  4. "strings"
  5. )
  6. // checks Regex and if enabled, entropy and stopwords
  7. func doChecks(diff string, commit Commit, repo *Repo) []Leak {
  8. var (
  9. match string
  10. leaks []Leak
  11. leak Leak
  12. )
  13. lines := strings.Split(diff, "\n")
  14. file := "unable to determine file"
  15. for _, line := range lines {
  16. if strings.Contains(line, "diff --git a") {
  17. idx := fileDiffRegex.FindStringIndex(line)
  18. if len(idx) == 2 {
  19. file = line[idx[1]:]
  20. }
  21. }
  22. for leakType, re := range regexes {
  23. match = re.FindString(line)
  24. if len(match) == 0 ||
  25. (opts.Strict && containsStopWords(line)) ||
  26. (opts.Entropy && !checkShannonEntropy(line, opts)) {
  27. continue
  28. }
  29. leak = Leak{
  30. Line: line,
  31. Commit: commit.Hash,
  32. Offender: match,
  33. Reason: leakType,
  34. Msg: commit.Msg,
  35. Time: commit.Time,
  36. Author: commit.Author,
  37. File: file,
  38. RepoURL: repo.url,
  39. }
  40. leaks = append(leaks, leak)
  41. }
  42. // Check for external regex matches
  43. if externalRegex != nil {
  44. for _, re := range externalRegex {
  45. match = re.FindString(line)
  46. if len(match) == 0 ||
  47. (opts.Strict && containsStopWords(line)) ||
  48. (opts.Entropy && !checkShannonEntropy(line, opts)) {
  49. continue
  50. }
  51. leak = Leak{
  52. Line: line,
  53. Commit: commit.Hash,
  54. Offender: match,
  55. Reason: "match: " + re.String(),
  56. Msg: commit.Msg,
  57. Time: commit.Time,
  58. Author: commit.Author,
  59. File: file,
  60. RepoURL: repo.url,
  61. }
  62. leaks = append(leaks, leak)
  63. }
  64. }
  65. }
  66. return leaks
  67. }
  68. // checkShannonEntropy checks entropy of target
  69. func checkShannonEntropy(target string, opts *Options) bool {
  70. var (
  71. sum float64
  72. targetBase64Len int
  73. targetHexLen int
  74. base64Freq = make(map[rune]float64)
  75. hexFreq = make(map[rune]float64)
  76. bits int
  77. )
  78. index := assignRegex.FindStringIndex(target)
  79. if len(index) == 0 {
  80. return false
  81. }
  82. target = strings.Trim(target[index[1]:], " ")
  83. if len(target) > 100 {
  84. return false
  85. }
  86. // base64Shannon
  87. for _, i := range target {
  88. if strings.Contains(base64Chars, string(i)) {
  89. base64Freq[i]++
  90. targetBase64Len++
  91. }
  92. }
  93. for _, v := range base64Freq {
  94. f := v / float64(targetBase64Len)
  95. sum += f * math.Log2(f)
  96. }
  97. bits = int(math.Ceil(sum*-1)) * targetBase64Len
  98. if bits > opts.B64EntropyCutoff {
  99. return true
  100. }
  101. // hexShannon
  102. sum = 0
  103. for _, i := range target {
  104. if strings.Contains(hexChars, string(i)) {
  105. hexFreq[i]++
  106. targetHexLen++
  107. }
  108. }
  109. for _, v := range hexFreq {
  110. f := v / float64(targetHexLen)
  111. sum += f * math.Log2(f)
  112. }
  113. bits = int(math.Ceil(sum*-1)) * targetHexLen
  114. return bits > opts.HexEntropyCutoff
  115. }
  116. // containsStopWords checks if there are any stop words in target
  117. func containsStopWords(target string) bool {
  118. // Convert to lowercase to reduce the number of loops needed.
  119. target = strings.ToLower(target)
  120. for _, stopWord := range stopWords {
  121. if strings.Contains(target, stopWord) {
  122. return true
  123. }
  124. }
  125. return false
  126. }