manager.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. package manager
  2. import (
  3. "crypto/sha1"
  4. "encoding/hex"
  5. "encoding/json"
  6. "fmt"
  7. "os"
  8. "os/signal"
  9. "runtime"
  10. "strings"
  11. "sync"
  12. "text/tabwriter"
  13. "time"
  14. "github.com/zricethezav/gitleaks/v6/config"
  15. "github.com/zricethezav/gitleaks/v6/options"
  16. "github.com/go-git/go-git/v5"
  17. "github.com/hako/durafmt"
  18. "github.com/mattn/go-colorable"
  19. log "github.com/sirupsen/logrus"
  20. )
  21. const maxLineLen = 200
  22. // Manager is a struct containing options and configs as well CloneOptions and CloneDir.
  23. // This struct is passed into each NewRepo so we are not passing around the manager in func params.
  24. type Manager struct {
  25. Opts options.Options
  26. Config config.Config
  27. CloneOptions *git.CloneOptions
  28. CloneDir string
  29. leaks []Leak
  30. leakChan chan Leak
  31. leakWG *sync.WaitGroup
  32. leakCache map[string]bool
  33. stopChan chan os.Signal
  34. metadata Metadata
  35. metaWG *sync.WaitGroup
  36. }
  37. // Leak is a struct that contains information about some line of code that contains
  38. // sensitive information as determined by the rules set in a gitleaks config
  39. type Leak struct {
  40. Line string `json:"line"`
  41. LineNumber int `json:"lineNumber"`
  42. Offender string `json:"offender"`
  43. Commit string `json:"commit"`
  44. Repo string `json:"repo"`
  45. Rule string `json:"rule"`
  46. Message string `json:"commitMessage"`
  47. Author string `json:"author"`
  48. Email string `json:"email"`
  49. File string `json:"file"`
  50. Date time.Time `json:"date"`
  51. Tags string `json:"tags"`
  52. Operation string `json:"operation"`
  53. lookupHash string
  54. }
  55. // ScanTime is a type used to determine total scan time
  56. type ScanTime int64
  57. // PatchTime is a type used to determine total patch time during an scan
  58. type PatchTime int64
  59. // CloneTime is a type used to determine total clone time
  60. type CloneTime int64
  61. // RegexTime is a type used to determine the time each rules' regex takes. This is especially useful
  62. // if you notice that gitleaks is taking a long time. You can use --debug to see the output of the regexTime
  63. // so you can determine which regex is not performing well.
  64. type RegexTime struct {
  65. Time int64
  66. Regex string
  67. }
  68. // Metadata is a struct used to communicate metadata about an scan like timings and total commit counts.
  69. type Metadata struct {
  70. mux *sync.Mutex
  71. data map[string]interface{}
  72. timings chan interface{}
  73. RegexTime map[string]int64
  74. Commits int
  75. ScanTime int64
  76. patchTime int64
  77. cloneTime int64
  78. }
  79. func init() {
  80. log.SetOutput(os.Stdout)
  81. log.SetFormatter(&log.TextFormatter{
  82. ForceColors: true,
  83. FullTimestamp: true,
  84. })
  85. // Fix colors on Windows
  86. if runtime.GOOS == "windows" {
  87. log.SetOutput(colorable.NewColorableStdout())
  88. }
  89. }
  90. // NewManager accepts options and returns a manager struct. The manager is a container for gitleaks configurations,
  91. // options and channel receivers.
  92. func NewManager(opts options.Options, cfg config.Config) (*Manager, error) {
  93. cloneOpts, err := opts.CloneOptions()
  94. if err != nil {
  95. return nil, err
  96. }
  97. m := &Manager{
  98. Opts: opts,
  99. Config: cfg,
  100. CloneOptions: cloneOpts,
  101. stopChan: make(chan os.Signal, 1),
  102. leakChan: make(chan Leak),
  103. leakWG: &sync.WaitGroup{},
  104. leakCache: make(map[string]bool),
  105. metaWG: &sync.WaitGroup{},
  106. metadata: Metadata{
  107. RegexTime: make(map[string]int64),
  108. timings: make(chan interface{}),
  109. data: make(map[string]interface{}),
  110. mux: new(sync.Mutex),
  111. },
  112. }
  113. signal.Notify(m.stopChan, os.Interrupt)
  114. // start receiving leaks and metadata
  115. go m.receiveLeaks()
  116. go m.receiveMetadata()
  117. go m.receiveInterrupt()
  118. return m, nil
  119. }
  120. // GetLeaks returns all available leaks
  121. func (manager *Manager) GetLeaks() []Leak {
  122. // need to wait for any straggling leaks
  123. manager.leakWG.Wait()
  124. return manager.leaks
  125. }
  126. // SendLeaks accepts a leak and is used by the scan pkg. This is the public function
  127. // that allows other packages to send leaks to the manager.
  128. func (manager *Manager) SendLeaks(l Leak) {
  129. if len(l.Line) > maxLineLen {
  130. l.Line = l.Line[0:maxLineLen-1] + "..."
  131. }
  132. if len(l.Offender) > maxLineLen {
  133. l.Offender = l.Offender[0:maxLineLen-1] + "..."
  134. }
  135. h := sha1.New()
  136. h.Write([]byte(l.Commit + l.Offender + l.File + l.Line + string(l.LineNumber)))
  137. l.lookupHash = hex.EncodeToString(h.Sum(nil))
  138. if manager.Opts.Redact {
  139. l.Line = strings.ReplaceAll(l.Line, l.Offender, "REDACTED")
  140. l.Offender = "REDACTED"
  141. }
  142. manager.leakWG.Add(1)
  143. manager.leakChan <- l
  144. }
  145. func (manager *Manager) alreadySeen(leak Leak) bool {
  146. if _, ok := manager.leakCache[leak.lookupHash]; ok {
  147. return true
  148. }
  149. manager.leakCache[leak.lookupHash] = true
  150. return false
  151. }
  152. // receiveLeaks listens to leakChan for incoming leaks. If any are received, they are appended to the
  153. // manager's leaks for future reporting. If the -v/--verbose option is set the leaks will marshaled into
  154. // json and printed out.
  155. func (manager *Manager) receiveLeaks() {
  156. for leak := range manager.leakChan {
  157. if manager.alreadySeen(leak) {
  158. manager.leakWG.Done()
  159. continue
  160. }
  161. manager.leaks = append(manager.leaks, leak)
  162. if manager.Opts.Verbose {
  163. var b []byte
  164. if manager.Opts.PrettyPrint {
  165. b, _ = json.MarshalIndent(leak, "", " ")
  166. } else {
  167. b, _ = json.Marshal(leak)
  168. }
  169. fmt.Println(string(b))
  170. }
  171. manager.leakWG.Done()
  172. }
  173. }
  174. // GetMetadata returns the metadata. TODO this may not need to be private
  175. func (manager *Manager) GetMetadata() Metadata {
  176. manager.metaWG.Wait()
  177. return manager.metadata
  178. }
  179. // receiveMetadata is where the messages sent to the metadata channel get consumed. You can view metadata
  180. // by running gitleaks with the --debug option set. This is extremely useful when trying to optimize regular
  181. // expressions as that what gitleaks spends most of its cycles on.
  182. func (manager *Manager) receiveMetadata() {
  183. for t := range manager.metadata.timings {
  184. switch ti := t.(type) {
  185. case CloneTime:
  186. manager.metadata.cloneTime += int64(ti)
  187. case ScanTime:
  188. manager.metadata.ScanTime += int64(ti)
  189. case PatchTime:
  190. manager.metadata.patchTime += int64(ti)
  191. case RegexTime:
  192. manager.metadata.RegexTime[ti.Regex] = manager.metadata.RegexTime[ti.Regex] + ti.Time
  193. }
  194. manager.metaWG.Done()
  195. }
  196. }
  197. // IncrementCommits increments total commits during an scan by i.
  198. func (manager *Manager) IncrementCommits(i int) {
  199. manager.metadata.mux.Lock()
  200. manager.metadata.Commits += i
  201. manager.metadata.mux.Unlock()
  202. }
  203. // RecordTime accepts an interface and sends it to the manager's time channel
  204. func (manager *Manager) RecordTime(t interface{}) {
  205. manager.metaWG.Add(1)
  206. manager.metadata.timings <- t
  207. }
  208. // DebugOutput logs metadata and other messages that occurred during a gitleaks scan
  209. func (manager *Manager) DebugOutput() {
  210. log.Debugf("-------------------------\n")
  211. log.Debugf("| Times and Commit Counts|\n")
  212. log.Debugf("-------------------------\n")
  213. fmt.Println("totalScanTime: ", durafmt.Parse(time.Duration(manager.metadata.ScanTime)*time.Nanosecond))
  214. fmt.Println("totalPatchTime: ", durafmt.Parse(time.Duration(manager.metadata.patchTime)*time.Nanosecond))
  215. fmt.Println("totalCloneTime: ", durafmt.Parse(time.Duration(manager.metadata.cloneTime)*time.Nanosecond))
  216. fmt.Println("totalCommits: ", manager.metadata.Commits)
  217. const padding = 6
  218. w := tabwriter.NewWriter(os.Stdout, 0, 0, padding, '.', 0)
  219. log.Debugf("--------------------------\n")
  220. log.Debugf("| Individual Regexes Times |\n")
  221. log.Debugf("--------------------------\n")
  222. for k, v := range manager.metadata.RegexTime {
  223. _, _ = fmt.Fprintf(w, "%s\t%s\n", k, durafmt.Parse(time.Duration(v)*time.Nanosecond))
  224. }
  225. _ = w.Flush()
  226. }
  227. func (manager *Manager) receiveInterrupt() {
  228. <-manager.stopChan
  229. if manager.Opts.Report != "" {
  230. err := manager.Report()
  231. if err != nil {
  232. log.Error(err)
  233. }
  234. }
  235. log.Info("gitleaks received interrupt, stopping scan")
  236. os.Exit(options.ErrorEncountered)
  237. }