common.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. package sources
  2. import (
  3. "bufio"
  4. "bytes"
  5. "context"
  6. "io"
  7. "path/filepath"
  8. "runtime"
  9. "github.com/mholt/archives"
  10. "github.com/zricethezav/gitleaks/v8/config"
  11. "github.com/zricethezav/gitleaks/v8/logging"
  12. )
  13. const maxPeekSize = 25 * 1_000 // 10kb
  14. var isWhitespace [256]bool
  15. var isWindows = runtime.GOOS == "windows"
  16. func init() {
  17. // define whitespace characters
  18. isWhitespace[' '] = true
  19. isWhitespace['\t'] = true
  20. isWhitespace['\n'] = true
  21. isWhitespace['\r'] = true
  22. }
  23. // isArchive does a light check to see if the provided path is an archive or
  24. // compressed file. The File source already does this, so this exists mainly
  25. // to avoid expensive calls before sending things to the File source
  26. func isArchive(ctx context.Context, path string) bool {
  27. format, _, err := archives.Identify(ctx, path, nil)
  28. return err == nil && format != nil
  29. }
  30. // shouldSkipPath checks a path against all the allowlists to see if it can
  31. // be skipped
  32. func shouldSkipPath(cfg *config.Config, path string) bool {
  33. if cfg == nil {
  34. logging.Trace().Str("path", path).Msg("not skipping path because config is nil")
  35. return false
  36. }
  37. for _, a := range cfg.Allowlists {
  38. if a.PathAllowed(path) ||
  39. // TODO: Remove this in v9.
  40. // This is an awkward hack to mitigate https://github.com/gitleaks/gitleaks/issues/1641.
  41. (isWindows && a.PathAllowed(filepath.ToSlash(path))) {
  42. return true
  43. }
  44. }
  45. return false
  46. }
  47. // readUntilSafeBoundary consumes |f| until it finds two consecutive `\n` characters, up to |maxPeekSize|.
  48. // This hopefully avoids splitting. (https://github.com/gitleaks/gitleaks/issues/1651)
  49. func readUntilSafeBoundary(r *bufio.Reader, n int, maxPeekSize int, peekBuf *bytes.Buffer) error {
  50. if peekBuf.Len() == 0 {
  51. return nil
  52. }
  53. // Does the buffer end in consecutive newlines?
  54. var (
  55. data = peekBuf.Bytes()
  56. lastChar = data[len(data)-1]
  57. newlineCount = 0 // Tracks consecutive newlines
  58. )
  59. if isWhitespace[lastChar] {
  60. for i := len(data) - 1; i >= 0; i-- {
  61. lastChar = data[i]
  62. if lastChar == '\n' {
  63. newlineCount++
  64. // Stop if two consecutive newlines are found
  65. if newlineCount >= 2 {
  66. return nil
  67. }
  68. } else if isWhitespace[lastChar] {
  69. // The presence of other whitespace characters (`\r`, ` `, `\t`) shouldn't reset the count.
  70. // (Intentionally do nothing.)
  71. } else {
  72. break
  73. }
  74. }
  75. }
  76. // If not, read ahead until we (hopefully) find some.
  77. newlineCount = 0
  78. for {
  79. data = peekBuf.Bytes()
  80. // Check if the last character is a newline.
  81. lastChar = data[len(data)-1]
  82. if lastChar == '\n' {
  83. newlineCount++
  84. // Stop if two consecutive newlines are found
  85. if newlineCount >= 2 {
  86. break
  87. }
  88. } else if isWhitespace[lastChar] {
  89. // The presence of other whitespace characters (`\r`, ` `, `\t`) shouldn't reset the count.
  90. // (Intentionally do nothing.)
  91. } else {
  92. newlineCount = 0 // Reset if a non-newline character is found
  93. }
  94. // Stop growing the buffer if it reaches maxSize
  95. if (peekBuf.Len() - n) >= maxPeekSize {
  96. break
  97. }
  98. // Read additional data into a temporary buffer
  99. b, err := r.ReadByte()
  100. if err != nil {
  101. if err == io.EOF {
  102. break
  103. }
  104. return err
  105. }
  106. peekBuf.WriteByte(b)
  107. }
  108. return nil
  109. }