encodings.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. package codec
  2. import (
  3. "fmt"
  4. "math"
  5. "regexp"
  6. "strings"
  7. )
  8. var (
  9. // encodingsRe is a regex built by combining all the encoding patterns
  10. // into named capture groups so that a single pass can detect multiple
  11. // encodings
  12. encodingsRe *regexp.Regexp
  13. // encodings contains all the encoding configurations for the detector.
  14. // The precedence is important. You want more specific encodings to
  15. // have a higher precedence or encodings that partially encode the
  16. // values (e.g. percent) unlike encodings that fully encode the string
  17. // (e.g. base64). If two encoding matches overlap the decoder will use
  18. // this order to determine which encoding should wait till the next pass.
  19. encodings = []*encoding{
  20. &encoding{
  21. kind: percentKind,
  22. pattern: `%[0-9A-Fa-f]{2}(?:.*%[0-9A-Fa-f]{2})?`,
  23. decode: decodePercent,
  24. },
  25. &encoding{
  26. kind: hexKind,
  27. pattern: `[0-9A-Fa-f]{32,}`,
  28. decode: decodeHex,
  29. },
  30. &encoding{
  31. kind: base64Kind,
  32. pattern: `[\w\/+-]{16,}={0,2}`,
  33. decode: decodeBase64,
  34. },
  35. }
  36. )
  37. // encodingNames is used to map the encodingKinds to their name
  38. var encodingNames = []string{
  39. "percent",
  40. "hex",
  41. "base64",
  42. }
  43. // encodingKind can be or'd together to capture all of the unique encodings
  44. // that were present in a segment
  45. type encodingKind int
  46. var (
  47. // make sure these go up by powers of 2
  48. percentKind = encodingKind(1)
  49. hexKind = encodingKind(2)
  50. base64Kind = encodingKind(4)
  51. )
  52. func (e encodingKind) String() string {
  53. i := int(math.Log2(float64(e)))
  54. if i >= len(encodingNames) {
  55. return ""
  56. }
  57. return encodingNames[i]
  58. }
  59. // kinds returns a list of encodingKinds combined in this one
  60. func (e encodingKind) kinds() []encodingKind {
  61. kinds := []encodingKind{}
  62. for i := 0; i < len(encodingNames); i++ {
  63. if kind := int(e) & int(math.Pow(2, float64(i))); kind != 0 {
  64. kinds = append(kinds, encodingKind(kind))
  65. }
  66. }
  67. return kinds
  68. }
  69. // encodingMatch represents a match of an encoding in the text
  70. type encodingMatch struct {
  71. encoding *encoding
  72. startEnd
  73. }
  74. // encoding represent a type of coding supported by the decoder.
  75. type encoding struct {
  76. // the kind of decoding (e.g. base64, etc)
  77. kind encodingKind
  78. // the regex pattern that matches the encoding format
  79. pattern string
  80. // take the match and return the decoded value
  81. decode func(string) string
  82. // determine which encoding should win out when two overlap
  83. precedence int
  84. }
  85. func init() {
  86. count := len(encodings)
  87. namedPatterns := make([]string, count)
  88. for i, encoding := range encodings {
  89. encoding.precedence = count - i
  90. namedPatterns[i] = fmt.Sprintf(
  91. "(?P<%s>%s)",
  92. encoding.kind,
  93. encoding.pattern,
  94. )
  95. }
  96. encodingsRe = regexp.MustCompile(strings.Join(namedPatterns, "|"))
  97. }
  98. // findEncodingMatches finds as many encodings as it can for this pass
  99. func findEncodingMatches(data string) []encodingMatch {
  100. var all []encodingMatch
  101. for _, matchIndex := range encodingsRe.FindAllStringSubmatchIndex(data, -1) {
  102. // Add the encodingMatch with its proper encoding
  103. for i, j := 2, 0; i < len(matchIndex); i, j = i+2, j+1 {
  104. if matchIndex[i] > -1 {
  105. all = append(all, encodingMatch{
  106. encoding: encodings[j],
  107. startEnd: startEnd{
  108. start: matchIndex[i],
  109. end: matchIndex[i+1],
  110. },
  111. })
  112. }
  113. }
  114. }
  115. totalMatches := len(all)
  116. if totalMatches == 1 {
  117. return all
  118. }
  119. // filter out lower precedence ones that overlap their neigbors
  120. filtered := make([]encodingMatch, 0, len(all))
  121. for i, m := range all {
  122. if i > 0 {
  123. prev := all[i-1]
  124. if m.overlaps(prev.startEnd) && prev.encoding.precedence > m.encoding.precedence {
  125. continue // skip this one
  126. }
  127. }
  128. if i+1 < totalMatches {
  129. next := all[i+1]
  130. if m.overlaps(next.startEnd) && next.encoding.precedence > m.encoding.precedence {
  131. continue // skip this one
  132. }
  133. }
  134. filtered = append(filtered, m)
  135. }
  136. return filtered
  137. }