package detect

import (
	"io"
	"os"
	"strings"

	"github.com/h2non/filetype"
	"github.com/rs/zerolog/log"
	"github.com/zricethezav/gitleaks/v8/report"
	"github.com/zricethezav/gitleaks/v8/sources"
)

func (d *Detector) DetectFiles(paths <-chan sources.ScanTarget) ([]report.Finding, error) {
	for pa := range paths {
		p := pa
		d.Sema.Go(func() error {

			f, err := os.Open(p.Path)
			if err != nil {
				return err
			}
			defer f.Close()

			// Get file size
			fileInfo, err := f.Stat()
			if err != nil {
				return err
			}
			fileSize := fileInfo.Size()
			if d.MaxTargetMegaBytes > 0 {
				rawLength := fileSize / 1000000
				if rawLength > int64(d.MaxTargetMegaBytes) {
					log.Debug().Msgf("skipping file: %s scan due to size: %d", p.Path, rawLength)
					return nil
				}
			}

			// Buffer to hold file chunks
			buf := make([]byte, chunkSize)
			totalLines := 0
			for {
				n, err := f.Read(buf)
				if err != nil && err != io.EOF {
					return err
				}
				if n == 0 {
					break
				}

				// TODO: optimization could be introduced here
				mimetype, err := filetype.Match(buf[:n])
				if err != nil {
					return err
				}
				if mimetype.MIME.Type == "application" {
					return nil // skip binary files
				}

				// Count the number of newlines in this chunk
				linesInChunk := strings.Count(string(buf[:n]), "\n")
				totalLines += linesInChunk
				fragment := Fragment{
					Raw:      string(buf[:n]),
					FilePath: p.Path,
				}
				if p.Symlink != "" {
					fragment.SymlinkFile = p.Symlink
				}
				for _, finding := range d.Detect(fragment) {
					// need to add 1 since line counting starts at 1
					finding.StartLine += (totalLines - linesInChunk) + 1
					finding.EndLine += (totalLines - linesInChunk) + 1
					d.addFinding(finding)
				}
			}

			return nil
		})
	}

	if err := d.Sema.Wait(); err != nil {
		return d.findings, err
	}

	return d.findings, nil
}