files.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. package sources
  2. import (
  3. "context"
  4. "errors"
  5. "io/fs"
  6. "os"
  7. "path/filepath"
  8. "sync"
  9. "github.com/fatih/semgroup"
  10. "github.com/zricethezav/gitleaks/v8/config"
  11. "github.com/zricethezav/gitleaks/v8/logging"
  12. )
  13. // TODO: remove this in v9 and have scanTargets yield file sources
  14. type ScanTarget struct {
  15. Path string
  16. Symlink string
  17. }
  18. // Deprecated: Use Files and detector.DetectSource instead
  19. func DirectoryTargets(sourcePath string, s *semgroup.Group, followSymlinks bool, allowlists []*config.Allowlist) (<-chan ScanTarget, error) {
  20. paths := make(chan ScanTarget)
  21. // create a Files source
  22. files := Files{
  23. FollowSymlinks: followSymlinks,
  24. Path: sourcePath,
  25. Sema: s,
  26. Config: &config.Config{
  27. Allowlists: allowlists,
  28. },
  29. }
  30. s.Go(func() error {
  31. err := files.scanTargets(func(scanTarget ScanTarget, err error) error {
  32. paths <- scanTarget
  33. return nil
  34. })
  35. close(paths)
  36. return err
  37. })
  38. return paths, nil
  39. }
  40. // Files is a source for yielding fragments from a collection of files
  41. type Files struct {
  42. Config *config.Config
  43. FollowSymlinks bool
  44. MaxFileSize int
  45. Path string
  46. Sema *semgroup.Group
  47. MaxArchiveDepth int
  48. }
  49. // scanTargets yields scan targets to a callback func
  50. func (s *Files) scanTargets(yield func(ScanTarget, error) error) error {
  51. return filepath.WalkDir(s.Path, func(path string, d fs.DirEntry, err error) error {
  52. scanTarget := ScanTarget{Path: path}
  53. logger := logging.With().Str("path", path).Logger()
  54. if err != nil {
  55. if os.IsPermission(err) {
  56. // This seems to only fail on directories at this stage.
  57. logger.Warn().Err(errors.New("permission denied")).Msg("skipping directory")
  58. return filepath.SkipDir
  59. }
  60. logger.Warn().Err(err).Msg("skipping")
  61. return nil
  62. }
  63. info, err := d.Info()
  64. if err != nil {
  65. if d.IsDir() {
  66. logger.Error().Err(err).Msg("skipping directory: could not get info")
  67. return filepath.SkipDir
  68. }
  69. logger.Error().Err(err).Msg("skipping file: could not get info")
  70. return nil
  71. }
  72. if !d.IsDir() {
  73. // Empty; nothing to do here.
  74. if info.Size() == 0 {
  75. logger.Debug().Msg("skipping empty file")
  76. return nil
  77. }
  78. // Too large; nothing to do here.
  79. if s.MaxFileSize > 0 && info.Size() > int64(s.MaxFileSize) {
  80. logger.Warn().Msgf(
  81. "skipping file: too large max_size=%dMB, size=%dMB",
  82. s.MaxFileSize/1_000_000, info.Size()/1_000_000,
  83. )
  84. return nil
  85. }
  86. }
  87. // set the initial scan target values
  88. if d.Type() == fs.ModeSymlink {
  89. if !s.FollowSymlinks {
  90. logger.Debug().Msg("skipping symlink: follow symlinks disabled")
  91. return nil
  92. }
  93. realPath, err := filepath.EvalSymlinks(path)
  94. if err != nil {
  95. logger.Error().Err(err).Msg("skipping symlink: could not evaluate")
  96. return nil
  97. }
  98. if realPathFileInfo, _ := os.Stat(realPath); realPathFileInfo.IsDir() {
  99. logger.Debug().Str("target", realPath).Msgf("skipping symlink: target is directory")
  100. return nil
  101. }
  102. scanTarget = ScanTarget{
  103. Path: realPath,
  104. Symlink: path,
  105. }
  106. }
  107. // handle dir cases (mainly just see if it should be skipped
  108. if info.IsDir() {
  109. if shouldSkipPath(s.Config, path) {
  110. logger.Debug().Msg("skipping directory: global allowlist")
  111. return filepath.SkipDir
  112. }
  113. return nil
  114. }
  115. if shouldSkipPath(s.Config, path) {
  116. logger.Debug().Msg("skipping file: global allowlist")
  117. return nil
  118. }
  119. return yield(scanTarget, nil)
  120. })
  121. }
  122. // Fragments yields fragments from files discovered under the path
  123. func (s *Files) Fragments(ctx context.Context, yield FragmentsFunc) error {
  124. var wg sync.WaitGroup
  125. err := s.scanTargets(func(scanTarget ScanTarget, err error) error {
  126. wg.Add(1)
  127. s.Sema.Go(func() error {
  128. logger := logging.With().Str("path", scanTarget.Path).Logger()
  129. logger.Trace().Msg("scanning path")
  130. f, err := os.Open(scanTarget.Path)
  131. if err != nil {
  132. if os.IsPermission(err) {
  133. logger.Warn().Msg("skipping file: permission denied")
  134. }
  135. wg.Done()
  136. return nil
  137. }
  138. // Convert this to a file source
  139. file := File{
  140. Content: f,
  141. Path: scanTarget.Path,
  142. Symlink: scanTarget.Symlink,
  143. Config: s.Config,
  144. MaxArchiveDepth: s.MaxArchiveDepth,
  145. }
  146. err = file.Fragments(ctx, yield)
  147. // Avoiding a defer in a hot loop
  148. _ = f.Close()
  149. wg.Done()
  150. return err
  151. })
  152. return nil
  153. })
  154. wg.Wait()
  155. return err
  156. }