files.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. package sources
  2. import (
  3. "context"
  4. "errors"
  5. "io/fs"
  6. "os"
  7. "path/filepath"
  8. "sync"
  9. "github.com/fatih/semgroup"
  10. "github.com/zricethezav/gitleaks/v8/config"
  11. "github.com/zricethezav/gitleaks/v8/logging"
  12. )
  13. // TODO: remove this in v9 and have scanTargets yield file sources
  14. type ScanTarget struct {
  15. Path string
  16. Symlink string
  17. }
  18. // Deprecated: Use Files and detector.DetectSource instead
  19. func DirectoryTargets(sourcePath string, s *semgroup.Group, followSymlinks bool, allowlists []*config.Allowlist) (<-chan ScanTarget, error) {
  20. paths := make(chan ScanTarget)
  21. // create a Files source
  22. files := Files{
  23. FollowSymlinks: followSymlinks,
  24. Path: sourcePath,
  25. Sema: s,
  26. Config: &config.Config{
  27. Allowlists: allowlists,
  28. },
  29. }
  30. s.Go(func() error {
  31. ctx := context.Background()
  32. err := files.scanTargets(ctx, func(scanTarget ScanTarget, err error) error {
  33. paths <- scanTarget
  34. return nil
  35. })
  36. close(paths)
  37. return err
  38. })
  39. return paths, nil
  40. }
  41. // Files is a source for yielding fragments from a collection of files
  42. type Files struct {
  43. Config *config.Config
  44. FollowSymlinks bool
  45. MaxFileSize int
  46. Path string
  47. Sema *semgroup.Group
  48. MaxArchiveDepth int
  49. }
  50. // scanTargets yields scan targets to a callback func
  51. func (s *Files) scanTargets(ctx context.Context, yield func(ScanTarget, error) error) error {
  52. return filepath.WalkDir(s.Path, func(path string, d fs.DirEntry, err error) error {
  53. scanTarget := ScanTarget{Path: path}
  54. logger := logging.With().Str("path", path).Logger()
  55. if err != nil {
  56. if os.IsPermission(err) {
  57. // This seems to only fail on directories at this stage.
  58. logger.Warn().Err(errors.New("permission denied")).Msg("skipping directory")
  59. return filepath.SkipDir
  60. }
  61. logger.Warn().Err(err).Msg("skipping")
  62. return nil
  63. }
  64. info, err := d.Info()
  65. if err != nil {
  66. if d.IsDir() {
  67. logger.Error().Err(err).Msg("skipping directory: could not get info")
  68. return filepath.SkipDir
  69. }
  70. logger.Error().Err(err).Msg("skipping file: could not get info")
  71. return nil
  72. }
  73. if !d.IsDir() {
  74. // Empty; nothing to do here.
  75. if info.Size() == 0 {
  76. logger.Debug().Msg("skipping empty file")
  77. return nil
  78. }
  79. // Too large; nothing to do here.
  80. if s.MaxFileSize > 0 && info.Size() > int64(s.MaxFileSize) {
  81. logger.Warn().Msgf(
  82. "skipping file: too large max_size=%dMB, size=%dMB",
  83. s.MaxFileSize/1_000_000, info.Size()/1_000_000,
  84. )
  85. return nil
  86. }
  87. }
  88. // set the initial scan target values
  89. if d.Type() == fs.ModeSymlink {
  90. if !s.FollowSymlinks {
  91. logger.Debug().Msg("skipping symlink: follow symlinks disabled")
  92. return nil
  93. }
  94. realPath, err := filepath.EvalSymlinks(path)
  95. if err != nil {
  96. logger.Error().Err(err).Msg("skipping symlink: could not evaluate")
  97. return nil
  98. }
  99. if realPathFileInfo, _ := os.Stat(realPath); realPathFileInfo.IsDir() {
  100. logger.Debug().Str("target", realPath).Msgf("skipping symlink: target is directory")
  101. return nil
  102. }
  103. scanTarget = ScanTarget{
  104. Path: realPath,
  105. Symlink: path,
  106. }
  107. }
  108. // handle dir cases (mainly just see if it should be skipped
  109. if info.IsDir() {
  110. if shouldSkipPath(s.Config, path) {
  111. logger.Debug().Msg("skipping directory: global allowlist")
  112. return filepath.SkipDir
  113. }
  114. return nil
  115. }
  116. if shouldSkipPath(s.Config, path) {
  117. logger.Debug().Msg("skipping file: global allowlist")
  118. return nil
  119. }
  120. return yield(scanTarget, nil)
  121. })
  122. }
  123. // Fragments yields fragments from files discovered under the path
  124. func (s *Files) Fragments(ctx context.Context, yield FragmentsFunc) error {
  125. var wg sync.WaitGroup
  126. err := s.scanTargets(ctx, func(scanTarget ScanTarget, err error) error {
  127. select {
  128. case <-ctx.Done():
  129. return ctx.Err()
  130. default:
  131. wg.Add(1)
  132. s.Sema.Go(func() error {
  133. logger := logging.With().Str("path", scanTarget.Path).Logger()
  134. logger.Trace().Msg("scanning path")
  135. f, err := os.Open(scanTarget.Path)
  136. if err != nil {
  137. if os.IsPermission(err) {
  138. logger.Warn().Msg("skipping file: permission denied")
  139. }
  140. wg.Done()
  141. return nil
  142. }
  143. // Convert this to a file source
  144. file := File{
  145. Content: f,
  146. Path: scanTarget.Path,
  147. Symlink: scanTarget.Symlink,
  148. Config: s.Config,
  149. MaxArchiveDepth: s.MaxArchiveDepth,
  150. }
  151. err = file.Fragments(ctx, yield)
  152. // Avoiding a defer in a hot loop
  153. _ = f.Close()
  154. wg.Done()
  155. return err
  156. })
  157. return nil
  158. }
  159. })
  160. select {
  161. case <-ctx.Done():
  162. return ctx.Err()
  163. default:
  164. wg.Wait()
  165. return err
  166. }
  167. }