generate.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. // == WARNING ==
  2. // These functions are used to generate GitLeak's default config.
  3. // You are free to use these in your own project, HOWEVER, no API stability is guaranteed.
  4. package utils
  5. import (
  6. "fmt"
  7. "strings"
  8. "github.com/zricethezav/gitleaks/v8/regexp"
  9. )
  10. const (
  11. // case insensitive prefix
  12. caseInsensitive = `(?i)`
  13. // identifier prefix (just an ignore group)
  14. identifierCaseInsensitivePrefix = `[\w.-]{0,50}?(?i:`
  15. identifierCaseInsensitiveSuffix = `)`
  16. identifierPrefix = `[\w.-]{0,50}?(?:`
  17. identifierSuffix = `)(?:[ \t\w.-]{0,20})[\s'"]{0,3}`
  18. // commonly used assignment operators or function call
  19. //language=regexp
  20. operator = `(?:=|>|:{1,3}=|\|\||:|=>|\?=|,)`
  21. // boundaries for the secret
  22. secretPrefixUnique = `\b(`
  23. secretPrefix = `[\x60'"\s=]{0,5}(`
  24. secretSuffix = `)(?:[\x60'"\s;]|\\[nr]|$)`
  25. )
  26. func GenerateSemiGenericRegex(identifiers []string, secretRegex string, isCaseInsensitive bool) *regexp.Regexp {
  27. var sb strings.Builder
  28. // The identifiers should always be case-insensitive.
  29. // This is inelegant but prevents an extraneous `(?i:)` from being added to the pattern; it could be removed.
  30. if isCaseInsensitive {
  31. sb.WriteString(caseInsensitive)
  32. writeIdentifiers(&sb, identifiers)
  33. } else {
  34. sb.WriteString(identifierCaseInsensitivePrefix)
  35. writeIdentifiers(&sb, identifiers)
  36. sb.WriteString(identifierCaseInsensitiveSuffix)
  37. }
  38. sb.WriteString(operator)
  39. sb.WriteString(secretPrefix)
  40. sb.WriteString(secretRegex)
  41. sb.WriteString(secretSuffix)
  42. return regexp.MustCompile(sb.String())
  43. }
  44. func MergeRegexps(regexps ...*regexp.Regexp) *regexp.Regexp {
  45. patterns := make([]string, len(regexps))
  46. for i, r := range regexps {
  47. patterns[i] = r.String()
  48. }
  49. return regexp.MustCompile(strings.Join(patterns, "|"))
  50. }
  51. func writeIdentifiers(sb *strings.Builder, identifiers []string) {
  52. sb.WriteString(identifierPrefix)
  53. sb.WriteString(strings.Join(identifiers, "|"))
  54. sb.WriteString(identifierSuffix)
  55. }
  56. func GenerateUniqueTokenRegex(secretRegex string, isCaseInsensitive bool) *regexp.Regexp {
  57. var sb strings.Builder
  58. if isCaseInsensitive {
  59. sb.WriteString(caseInsensitive)
  60. }
  61. sb.WriteString(secretPrefixUnique)
  62. sb.WriteString(secretRegex)
  63. sb.WriteString(secretSuffix)
  64. return regexp.MustCompile(sb.String())
  65. }
  66. func GenerateSampleSecret(identifier string, secret string) string {
  67. return fmt.Sprintf("%s_api_token = \"%s\"", identifier, secret)
  68. }
  69. // See: https://github.com/gitleaks/gitleaks/issues/1222
  70. func GenerateSampleSecrets(identifier string, secret string) []string {
  71. samples := map[string]string{
  72. // Configuration
  73. // INI
  74. "ini - quoted1": "{i}Token=\"{s}\"",
  75. "ini - quoted2": "{i}Token = \"{s}\"",
  76. "ini - unquoted1": "{i}Token={s}",
  77. "ini - unquoted2": "{i}Token = {s}",
  78. // JSON
  79. "json - string": "{\n \"{i}_token\": \"{s}\"\n}",
  80. // TODO: "json - escaped string": "\\{\n \\\"{i}_token\\\": \\\"{s}\\\"\n\\}",
  81. // TODO: "json - string key/value": "{\n \"name\": \"{i}_token\",\n \"value\": \"{s}\"\n}",
  82. "json - escaped newline in string": `{"config.ini": "{I}_TOKEN={s}\nBACKUP_ENABLED=true"}`,
  83. // XML
  84. // TODO: "xml - element": "<{i}Token>{s}</{i}Token>",
  85. "xml - element multiline": "<{i}Token>\n {s}\n</{i}Token>",
  86. // TODO: "xml - attribute": "<entry name=\"{i}Token\" value=\"{s}\" />",
  87. // TODO: "xml - key/value elements": "<entry>\n <name=\"{i}Token\" />\n <value=\"{s}\" />\n</entry>",
  88. // YAML
  89. "yaml - singleline - unquoted": "{i}_token: {s}",
  90. "yaml - singleline - single quote": "{i}_token: '{s}'",
  91. "yaml - singleline - double quote": "{i}_token: \"{s}\"",
  92. // TODO: "yaml - multiline - literal": "{i}_token: |\n {s}",
  93. // TODO: "yaml - multiline - folding": "{i}_token: >\n {s}",
  94. // "": "",
  95. // Programming Languages
  96. "C#": `string {i}Token = "{s}";`,
  97. "go - normal": `var {i}Token string = "{s}"`,
  98. "go - short": `{i}Token := "{s}"`,
  99. "go - backticks": "{i}Token := `{s}`",
  100. "java": "String {i}Token = \"{s}\";",
  101. // TODO: "java - escaped quotes": `config.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"JDOE35\" {i}Token=\"{s}\""`,
  102. // TODO:"kotlin - type": "var {i}Token: string = \"{s}\"",
  103. "kotlin - notype": "var {i}Token = \"{s}\"",
  104. "php - string concat": `${i}Token .= "{s}"`,
  105. // TODO: "php - null coalesce": `${i}Token ??= "{s}"`,
  106. "python - single quote": "{i}Token = '{s}'",
  107. "python - double quote": `{i}Token = "{s}"`,
  108. // "": "",
  109. // Miscellaneous
  110. // TODO: "url - basic auth": `https://{i}:{s}@example.com/`,
  111. // TODO: "url - query parameter": "https://example.com?{i}Token={s}&fooBar=baz",
  112. // TODO: "comment - slash": "//{s} is the password",
  113. // TODO: "comment - slash multiline": "/*{s} is the password",
  114. // TODO: "comment - hashtag": "#{s} is the password",
  115. // TODO: "comment - semicolon": ";{s} is the password",
  116. // TODO: "csv - unquoted": `{i}Token,{s},`,
  117. "misc - comma operator": `System.setProperty("{I}_TOKEN", "{s}")`,
  118. // TODO: "misc - comma suffix": `environmentVariables = {PATH=/usr/local/bin, ENV=/etc/bashrc, {I}_PASSWORD={s}, LC_CTYPE=en_CA.UTF-8},`, // Spotted in `./Library/Logs/gradle-kotlin-dsl-resolver-xxxx.log`
  119. "logstash": " \"{i}Token\" => \"{s}\"",
  120. // TODO: "sql - tabular": "|{s}|",
  121. // TODO: "sql": "",
  122. // Makefile
  123. // See: https://github.com/gitleaks/gitleaks/pull/1191
  124. "make - recursive assignment": "{i}_TOKEN = \"{s}\"",
  125. "make - simple assignment": "{i}_TOKEN := \"{s}\"",
  126. "make - shell assignment": "{i}_TOKEN ::= \"{s}\"",
  127. "make - evaluated shell assignment": "{i}_TOKEN :::= \"{s}\"",
  128. "make - conditional assignment": "{i}_TOKEN ?= \"{s}\"",
  129. // TODO: "make - append": "{i}_TOKEN += \"{s}\"",
  130. // "": "",
  131. }
  132. replacer := strings.NewReplacer(
  133. "{i}", identifier,
  134. "{I}", strings.ToUpper(identifier),
  135. "{s}", secret,
  136. )
  137. cases := make([]string, 0, len(samples))
  138. for _, v := range samples {
  139. cases = append(cases, replacer.Replace(v))
  140. }
  141. return cases
  142. }