| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455 |
- package scan
- import (
- "bytes"
- "fmt"
- "io"
- "sync"
- "time"
- "github.com/zricethezav/gitleaks/v5/manager"
- "github.com/go-git/go-git/v5"
- "github.com/go-git/go-git/v5/plumbing"
- fdiff "github.com/go-git/go-git/v5/plumbing/format/diff"
- "github.com/go-git/go-git/v5/plumbing/object"
- "github.com/go-git/go-git/v5/plumbing/storer"
- "github.com/sergi/go-diff/diffmatchpatch"
- log "github.com/sirupsen/logrus"
- )
- // Bundle contains various git information for scans.
- type Bundle struct {
- Commit *object.Commit
- Patch string
- Content string
- FilePath string
- Operation fdiff.Operation
- reader io.Reader
- lineLookup map[string]bool
- scanType int
- }
- // commitScanner is a function signature for scanning commits. There is some
- // redundant work needed by scanning all files at a commit (--files-at-commit=) and scanning
- // the patches generated by a commit (--commit=). The function scanCommit wraps that redundant work
- // and accepts a commitScanner for the different logic needed between the two cases described above.
- type commitScanner func(c *object.Commit, repo *Repo) error
- const (
- // We need to differentiate between scans as the logic for line searching is different between
- // scanning patches, commits, and uncommitted files.
- patchScan int = iota + 1
- uncommittedScan
- commitScan
- )
- // Scan is responsible for scanning the entire history (default behavior) of a
- // git repo. Options that can change the behavior of this function include: --Commit, --depth, --branch.
- // See options/options.go for an explanation on these options.
- func (repo *Repo) Scan() error {
- if err := repo.setupTimeout(); err != nil {
- return err
- }
- if repo.cancel != nil {
- defer repo.cancel()
- }
- if repo.Repository == nil {
- return fmt.Errorf("%s repo is empty", repo.Name)
- }
- // load up alternative config if possible, if not use manager's config
- if repo.Manager.Opts.RepoConfig {
- cfg, err := repo.loadRepoConfig()
- if err != nil {
- return err
- }
- repo.config = cfg
- }
- scanTimeStart := time.Now()
- // scan Commit patches OR all files at Commit. See https://github.com/zricethezav/gitleaks/issues/326
- if repo.Manager.Opts.Commit != "" {
- return scanCommit(repo.Manager.Opts.Commit, repo, scanCommitPatches)
- } else if repo.Manager.Opts.FilesAtCommit != "" {
- return scanCommit(repo.Manager.Opts.FilesAtCommit, repo, scanFilesAtCommit)
- }
- logOpts, err := getLogOptions(repo)
- if err != nil {
- return err
- }
- cIter, err := repo.Log(logOpts)
- if err != nil {
- return err
- }
- cc := 0
- semaphore := make(chan bool, howManyThreads(repo.Manager.Opts.Threads))
- wg := sync.WaitGroup{}
- err = cIter.ForEach(func(c *object.Commit) error {
- if c == nil || repo.timeoutReached() || repo.depthReached(cc) {
- return storer.ErrStop
- }
- // Check if Commit is allowlisted
- if isCommitWhiteListed(c.Hash.String(), repo.config.Allowlist.Commits) {
- return nil
- }
- // Check if at root
- if len(c.ParentHashes) == 0 {
- cc++
- err = scanFilesAtCommit(c, repo)
- if err != nil {
- return err
- }
- return nil
- }
- // increase Commit counter
- cc++
- err = c.Parents().ForEach(func(parent *object.Commit) error {
- defer func() {
- if err := recover(); err != nil {
- // sometimes the Patch generation will fail due to a known bug in
- // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
- // Once a fix has been merged I will remove this recover.
- return
- }
- }()
- if repo.timeoutReached() {
- return nil
- }
- if parent == nil {
- // shouldn't reach this point but just in case
- return nil
- }
- start := time.Now()
- patch, err := parent.Patch(c)
- if err != nil {
- return fmt.Errorf("could not generate Patch")
- }
- repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
- wg.Add(1)
- semaphore <- true
- go func(c *object.Commit, patch *object.Patch) {
- defer func() {
- <-semaphore
- wg.Done()
- }()
- scanPatch(patch, c, repo)
- }(c, patch)
- return nil
- })
- if c.Hash.String() == repo.Manager.Opts.CommitTo {
- return storer.ErrStop
- }
- return nil
- })
- wg.Wait()
- repo.Manager.RecordTime(manager.ScanTime(howLong(scanTimeStart)))
- repo.Manager.IncrementCommits(cc)
- return nil
- }
- // scanEmpty scans an empty repo without any commits. See https://github.com/zricethezav/gitleaks/issues/352
- func (repo *Repo) scanEmpty() error {
- scanTimeStart := time.Now()
- wt, err := repo.Worktree()
- if err != nil {
- return err
- }
- status, err := wt.Status()
- for fn := range status {
- workTreeBuf := bytes.NewBuffer(nil)
- workTreeFile, err := wt.Filesystem.Open(fn)
- if err != nil {
- continue
- }
- if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
- return err
- }
- repo.CheckRules(&Bundle{
- Content: workTreeBuf.String(),
- FilePath: workTreeFile.Name(),
- Commit: emptyCommit(),
- scanType: uncommittedScan,
- })
- }
- repo.Manager.RecordTime(manager.ScanTime(howLong(scanTimeStart)))
- return nil
- }
- // scanUncommitted will do a `git diff` and scan changed files that are being tracked. This is useful functionality
- // for a pre-Commit hook so you can make sure your code does not have any leaks before committing.
- func (repo *Repo) scanUncommitted() error {
- // load up alternative config if possible, if not use manager's config
- if repo.Manager.Opts.RepoConfig {
- cfg, err := repo.loadRepoConfig()
- if err != nil {
- return err
- }
- repo.config = cfg
- }
- if err := repo.setupTimeout(); err != nil {
- return err
- }
- r, err := repo.Head()
- if err == plumbing.ErrReferenceNotFound {
- // possibly an empty repo, or maybe its not, either way lets scan all the files in the directory
- return repo.scanEmpty()
- } else if err != nil {
- return err
- }
- scanTimeStart := time.Now()
- c, err := repo.CommitObject(r.Hash())
- if err != nil {
- return err
- }
- // Staged change so the Commit details do not yet exist. Insert empty defaults.
- c.Hash = plumbing.Hash{}
- c.Message = "***STAGED CHANGES***"
- c.Author.Name = ""
- c.Author.Email = ""
- c.Author.When = time.Unix(0, 0).UTC()
- prevTree, err := c.Tree()
- if err != nil {
- return err
- }
- wt, err := repo.Worktree()
- if err != nil {
- return err
- }
- status, err := wt.Status()
- for fn, state := range status {
- var (
- prevFileContents string
- currFileContents string
- filename string
- )
- if state.Staging != git.Untracked {
- if state.Staging == git.Deleted {
- // file in staging has been deleted, aka it is not on the filesystem
- // so the contents of the file are ""
- currFileContents = ""
- } else {
- workTreeBuf := bytes.NewBuffer(nil)
- workTreeFile, err := wt.Filesystem.Open(fn)
- if err != nil {
- continue
- }
- if _, err := io.Copy(workTreeBuf, workTreeFile); err != nil {
- return err
- }
- currFileContents = workTreeBuf.String()
- filename = workTreeFile.Name()
- }
- // get files at HEAD state
- prevFile, err := prevTree.File(fn)
- if err != nil {
- prevFileContents = ""
- } else {
- prevFileContents, err = prevFile.Contents()
- if err != nil {
- return err
- }
- if filename == "" {
- filename = prevFile.Name
- }
- }
- diffs := diffmatchpatch.New().DiffMain(prevFileContents, currFileContents, false)
- var diffContents string
- for _, d := range diffs {
- if d.Type == diffmatchpatch.DiffInsert {
- diffContents += fmt.Sprintf("%s\n", d.Text)
- }
- }
- repo.CheckRules(&Bundle{
- Content: diffContents,
- FilePath: filename,
- Commit: c,
- scanType: uncommittedScan,
- })
- }
- }
- if err != nil {
- return err
- }
- repo.Manager.RecordTime(manager.ScanTime(howLong(scanTimeStart)))
- return nil
- }
- // scan accepts a Patch, Commit, and repo. If the patches contains files that are
- // binary, then gitleaks will skip scanning that file OR if a file is matched on
- // allowlisted files set in the configuration. If a global rule for files is defined and a filename
- // matches said global rule, then a leak is sent to the manager.
- // After that, file chunks are created which are then inspected by InspectString()
- func scanPatch(patch *object.Patch, c *object.Commit, repo *Repo) {
- bundle := Bundle{
- Commit: c,
- Patch: patch.String(),
- scanType: patchScan,
- }
- for _, f := range patch.FilePatches() {
- if repo.timeoutReached() {
- return
- }
- if f.IsBinary() {
- continue
- }
- for _, chunk := range f.Chunks() {
- if chunk.Type() == fdiff.Add || (repo.Manager.Opts.Deletion && chunk.Type() == fdiff.Delete) {
- bundle.Content = chunk.Content()
- bundle.Operation = chunk.Type()
- // get filepath
- from, to := f.Files()
- if from != nil {
- bundle.FilePath = from.Path()
- } else if to != nil {
- bundle.FilePath = to.Path()
- } else {
- bundle.FilePath = "???"
- }
- repo.CheckRules(&bundle)
- }
- }
- }
- }
- // scanCommit accepts a Commit hash, repo, and commit scanning function. A new Commit
- // object will be created from the hash which will be passed into either scanCommitPatches
- // or scanFilesAtCommit depending on the options set.
- func scanCommit(commit string, repo *Repo, f commitScanner) error {
- if commit == "latest" {
- ref, err := repo.Repository.Head()
- if err != nil {
- return err
- }
- commit = ref.Hash().String()
- }
- repo.Manager.IncrementCommits(1)
- h := plumbing.NewHash(commit)
- c, err := repo.CommitObject(h)
- if err != nil {
- return err
- }
- return f(c, repo)
- }
- // scanCommitPatches accepts a Commit object and a repo. This function is only called when the --Commit=
- // option has been set. That option tells gitleaks to look only at a single Commit and check the contents
- // of said Commit. Similar to scan(), if the files contained in the Commit are a binaries or if they are
- // allowlisted then those files will be skipped.
- func scanCommitPatches(c *object.Commit, repo *Repo) error {
- if len(c.ParentHashes) == 0 {
- err := scanFilesAtCommit(c, repo)
- if err != nil {
- return err
- }
- }
- return c.Parents().ForEach(func(parent *object.Commit) error {
- defer func() {
- if err := recover(); err != nil {
- // sometimes the Patch generation will fail due to a known bug in
- // sergi's go-diff: https://github.com/sergi/go-diff/issues/89.
- // Once a fix has been merged I will remove this recover.
- return
- }
- }()
- if repo.timeoutReached() {
- return nil
- }
- if parent == nil {
- return nil
- }
- start := time.Now()
- patch, err := parent.Patch(c)
- if err != nil {
- return fmt.Errorf("could not generate Patch")
- }
- repo.Manager.RecordTime(manager.PatchTime(howLong(start)))
- scanPatch(patch, c, repo)
- return nil
- })
- }
- // scanFilesAtCommit accepts a Commit object and a repo. This function is only called when the --files-at-Commit=
- // option has been set. That option tells gitleaks to look only at ALL the files at a Commit and check the contents
- // of said Commit. Similar to scan(), if the files contained in the Commit are a binaries or if they are
- // allowlisted then those files will be skipped.
- func scanFilesAtCommit(c *object.Commit, repo *Repo) error {
- fIter, err := c.Files()
- if err != nil {
- return err
- }
- err = fIter.ForEach(func(f *object.File) error {
- bin, err := f.IsBinary()
- if bin || repo.timeoutReached() {
- return nil
- } else if err != nil {
- return err
- }
- content, err := f.Contents()
- if err != nil {
- return err
- }
- repo.CheckRules(&Bundle{
- Content: content,
- FilePath: f.Name,
- Commit: c,
- scanType: commitScan,
- Operation: fdiff.Add,
- })
- return nil
- })
- return err
- }
- // depthReached checks if i meets the depth (--depth=) if set
- func (repo *Repo) depthReached(i int) bool {
- if repo.Manager.Opts.Depth != 0 && repo.Manager.Opts.Depth == i {
- log.Warnf("Exceeded depth limit (%d)", i)
- return true
- }
- return false
- }
- // emptyCommit generates an empty commit used for scanning uncommitted changes
- func emptyCommit() *object.Commit {
- return &object.Commit{
- Hash: plumbing.Hash{},
- Message: "***STAGED CHANGES***",
- Author: object.Signature{
- Name: "",
- Email: "",
- When: time.Unix(0, 0).UTC(),
- },
- }
- }
|