archive.go 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. package detect
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "os"
  7. "path/filepath"
  8. "strings"
  9. "github.com/mholt/archives"
  10. "github.com/zricethezav/gitleaks/v8/sources"
  11. )
  12. // IsArchive asks archives.Identify (with a nil stream, so only the filename)
  13. // whether this file would be handled by an Extractor. If Identify returns
  14. // a Format implementing archives.Extractor, we treat it as an archive.
  15. func IsArchive(path string) bool {
  16. format, _, err := archives.Identify(context.Background(), path, nil)
  17. if err != nil {
  18. // no matching format at all
  19. return false
  20. }
  21. _, ok := format.(archives.Extractor)
  22. return ok
  23. }
  24. // ExtractArchive extracts all files from archivePath into a temp dir.
  25. // Returns the list of ScanTargets (with real file paths) and the temp dir for cleanup.
  26. func ExtractArchive(archivePath string) ([]sources.ScanTarget, string, error) {
  27. // 1. Create a temp dir
  28. tmpDir, err := os.MkdirTemp("", "gitleaks-archive-")
  29. if err != nil {
  30. return nil, "", err
  31. }
  32. // 2. Open the archive
  33. f, err := os.Open(archivePath)
  34. if err != nil {
  35. os.RemoveAll(tmpDir)
  36. return nil, "", err
  37. }
  38. defer f.Close()
  39. // 3. Identify format (name-based + header peek)
  40. ctx := context.Background()
  41. format, stream, err := archives.Identify(ctx, archivePath, f)
  42. if err != nil {
  43. os.RemoveAll(tmpDir)
  44. return nil, "", err
  45. }
  46. // 4. Ensure it's extractable
  47. extractor, ok := format.(archives.Extractor)
  48. if !ok {
  49. os.RemoveAll(tmpDir)
  50. return nil, "", fmt.Errorf("format %T is not extractable", format)
  51. }
  52. // 5. Walk and extract
  53. var targets []sources.ScanTarget
  54. err = extractor.Extract(ctx, stream, func(ctx context.Context, file archives.FileInfo) error {
  55. name := file.Name()
  56. // skip macOS metadata and __MACOSX folders
  57. // TODO add more exceptions here if needed
  58. base := filepath.Base(name)
  59. if strings.HasPrefix(base, "._") || strings.HasPrefix(name, "__MACOSX/") {
  60. return nil
  61. }
  62. if file.IsDir() {
  63. return nil
  64. }
  65. // open and copy out
  66. r, err := file.Open()
  67. if err != nil {
  68. return err
  69. }
  70. defer r.Close()
  71. outPath := filepath.Join(tmpDir, file.Name())
  72. if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil {
  73. return err
  74. }
  75. outFile, err := os.Create(outPath)
  76. if err != nil {
  77. return err
  78. }
  79. defer outFile.Close()
  80. if _, err := io.Copy(outFile, r); err != nil {
  81. return err
  82. }
  83. targets = append(targets, sources.ScanTarget{Path: outPath})
  84. return nil
  85. })
  86. return targets, tmpDir, err
  87. }