archive.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. package detect
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "os"
  7. "path/filepath"
  8. "strings"
  9. "github.com/mholt/archives"
  10. "github.com/zricethezav/gitleaks/v8/sources"
  11. )
  12. // isArchive asks archives.Identify (with a nil stream, so only the filename)
  13. // whether this file would be handled by an Extractor. If Identify returns
  14. // a Format implementing archives.Extractor, we treat it as an archive.
  15. func isArchive(path string) bool {
  16. format, _, err := archives.Identify(context.Background(), path, nil)
  17. if err != nil {
  18. // no matching format at all
  19. return false
  20. }
  21. _, ok := format.(archives.Extractor)
  22. return ok
  23. }
  24. // ExtractArchive extracts all files from archivePath into a temp dir.
  25. // Returns the list of ScanTargets (with real file paths) and the temp dir for cleanup.
  26. func extractArchive(archivePath string) ([]sources.ScanTarget, string, error) {
  27. tmpArchiveDir, err := os.MkdirTemp(tmpDir, "archive-*")
  28. if err != nil {
  29. return nil, "", fmt.Errorf("creating temp dir for archive: %w", err)
  30. }
  31. f, err := os.Open(archivePath)
  32. if err != nil {
  33. os.RemoveAll(tmpArchiveDir)
  34. return nil, "", err
  35. }
  36. defer f.Close()
  37. ctx := context.Background()
  38. format, stream, err := archives.Identify(ctx, archivePath, f)
  39. if err != nil {
  40. os.RemoveAll(tmpArchiveDir)
  41. return nil, "", err
  42. }
  43. extractor, ok := format.(archives.Extractor)
  44. if !ok {
  45. os.RemoveAll(tmpArchiveDir)
  46. return nil, "", fmt.Errorf("format %T is not extractable", format)
  47. }
  48. // Walk and extract
  49. var targets []sources.ScanTarget
  50. err = extractor.Extract(ctx, stream, func(ctx context.Context, file archives.FileInfo) error {
  51. name := file.Name()
  52. // skip macOS metadata and __MACOSX folders
  53. // TODO add more exceptions here if needed
  54. base := filepath.Base(name)
  55. if strings.HasPrefix(base, "._") || strings.HasPrefix(name, "__MACOSX/") {
  56. return nil
  57. }
  58. if file.IsDir() {
  59. return nil
  60. }
  61. // open and copy out
  62. r, err := file.Open()
  63. if err != nil {
  64. return err
  65. }
  66. defer r.Close()
  67. outPath := filepath.Join(tmpArchiveDir, file.Name())
  68. if err := os.MkdirAll(filepath.Dir(outPath), 0o755); err != nil {
  69. return err
  70. }
  71. outFile, err := os.Create(outPath)
  72. if err != nil {
  73. return err
  74. }
  75. defer outFile.Close()
  76. if _, err := io.Copy(outFile, r); err != nil {
  77. return err
  78. }
  79. targets = append(targets, sources.ScanTarget{Path: outPath})
  80. return nil
  81. })
  82. return targets, tmpArchiveDir, err
  83. }