Parcourir la source

dont crash on 100gb files pls (#1292)

Zachary Rice il y a 2 ans
Parent
commit
dab7d0292a
1 fichiers modifiés avec 39 ajouts et 20 suppressions
  1. 39 20
      detect/detect.go

+ 39 - 20
detect/detect.go

@@ -37,6 +37,7 @@ const (
 	ProtectStagedType
 
 	gitleaksAllowSignature = "gitleaks:allow"
+	chunkSize              = 10 * 1_000 // 10kb
 )
 
 // Detector is the main detector struct
@@ -502,31 +503,49 @@ func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
 	for pa := range paths {
 		p := pa
 		s.Go(func() error {
-			b, err := os.ReadFile(p.Path)
+			f, err := os.Open(p.Path)
 			if err != nil {
 				return err
 			}
+			defer f.Close()
+
+			// Buffer to hold file chunks
+			buf := make([]byte, chunkSize)
+			totalLines := 0
+			for {
+				n, err := f.Read(buf)
+				if err != nil && err != io.EOF {
+					return err
+				}
+				if n == 0 {
+					break
+				}
 
-			mimetype, err := filetype.Match(b)
-			if err != nil {
-				return err
-			}
-			if mimetype.MIME.Type == "application" {
-				return nil // skip binary files
-			}
+				// TODO: optimization could be introduced here
+				mimetype, err := filetype.Match(buf[:n])
+				if err != nil {
+					return err
+				}
+				if mimetype.MIME.Type == "application" {
+					return nil // skip binary files
+				}
 
-			fragment := Fragment{
-				Raw:      string(b),
-				FilePath: p.Path,
-			}
-			if p.Symlink != "" {
-				fragment.SymlinkFile = p.Symlink
-			}
-			for _, finding := range d.Detect(fragment) {
-				// need to add 1 since line counting starts at 1
-				finding.EndLine++
-				finding.StartLine++
-				d.addFinding(finding)
+				// Count the number of newlines in this chunk
+				linesInChunk := strings.Count(string(buf[:n]), "\n")
+				totalLines += linesInChunk
+				fragment := Fragment{
+					Raw:      string(buf[:n]),
+					FilePath: p.Path,
+				}
+				if p.Symlink != "" {
+					fragment.SymlinkFile = p.Symlink
+				}
+				for _, finding := range d.Detect(fragment) {
+					// need to add 1 since line counting starts at 1
+					finding.StartLine += (totalLines - linesInChunk) + 1
+					finding.EndLine += (totalLines - linesInChunk) + 1
+					d.addFinding(finding)
+				}
 			}
 
 			return nil