| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125 |
- package sources
- import (
- "bufio"
- "bytes"
- "context"
- "io"
- "path/filepath"
- "runtime"
- "github.com/mholt/archives"
- "github.com/zricethezav/gitleaks/v8/config"
- "github.com/zricethezav/gitleaks/v8/logging"
- )
- const maxPeekSize = 25 * 1_000 // 10kb
- var isWhitespace [256]bool
- var isWindows = runtime.GOOS == "windows"
- func init() {
- // define whitespace characters
- isWhitespace[' '] = true
- isWhitespace['\t'] = true
- isWhitespace['\n'] = true
- isWhitespace['\r'] = true
- }
- // isArchive does a light check to see if the provided path is an archive or
- // compressed file. The File source already does this, so this exists mainly
- // to avoid expensive calls before sending things to the File source
- func isArchive(ctx context.Context, path string) bool {
- format, _, err := archives.Identify(ctx, path, nil)
- return err == nil && format != nil
- }
- // shouldSkipPath checks a path against all the allowlists to see if it can
- // be skipped
- func shouldSkipPath(cfg *config.Config, path string) bool {
- if cfg == nil {
- logging.Trace().Str("path", path).Msg("not skipping path because config is nil")
- return false
- }
- for _, a := range cfg.Allowlists {
- if a.PathAllowed(path) ||
- // TODO: Remove this in v9.
- // This is an awkward hack to mitigate https://github.com/gitleaks/gitleaks/issues/1641.
- (isWindows && a.PathAllowed(filepath.ToSlash(path))) {
- return true
- }
- }
- return false
- }
- // readUntilSafeBoundary consumes |f| until it finds two consecutive `\n` characters, up to |maxPeekSize|.
- // This hopefully avoids splitting. (https://github.com/gitleaks/gitleaks/issues/1651)
- func readUntilSafeBoundary(r *bufio.Reader, n int, maxPeekSize int, peekBuf *bytes.Buffer) error {
- if peekBuf.Len() == 0 {
- return nil
- }
- // Does the buffer end in consecutive newlines?
- var (
- data = peekBuf.Bytes()
- lastChar = data[len(data)-1]
- newlineCount = 0 // Tracks consecutive newlines
- )
- if isWhitespace[lastChar] {
- for i := len(data) - 1; i >= 0; i-- {
- lastChar = data[i]
- if lastChar == '\n' {
- newlineCount++
- // Stop if two consecutive newlines are found
- if newlineCount >= 2 {
- return nil
- }
- } else if isWhitespace[lastChar] {
- // The presence of other whitespace characters (`\r`, ` `, `\t`) shouldn't reset the count.
- // (Intentionally do nothing.)
- } else {
- break
- }
- }
- }
- // If not, read ahead until we (hopefully) find some.
- newlineCount = 0
- for {
- data = peekBuf.Bytes()
- // Check if the last character is a newline.
- lastChar = data[len(data)-1]
- if lastChar == '\n' {
- newlineCount++
- // Stop if two consecutive newlines are found
- if newlineCount >= 2 {
- break
- }
- } else if isWhitespace[lastChar] {
- // The presence of other whitespace characters (`\r`, ` `, `\t`) shouldn't reset the count.
- // (Intentionally do nothing.)
- } else {
- newlineCount = 0 // Reset if a non-newline character is found
- }
- // Stop growing the buffer if it reaches maxSize
- if (peekBuf.Len() - n) >= maxPeekSize {
- break
- }
- // Read additional data into a temporary buffer
- b, err := r.ReadByte()
- if err != nil {
- if err == io.EOF {
- break
- }
- return err
- }
- peekBuf.WriteByte(b)
- }
- return nil
- }
|