4
0

decoder.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. package codec
  2. import (
  3. "bytes"
  4. "github.com/zricethezav/gitleaks/v8/logging"
  5. )
  6. // Decoder decodes various types of data in place
  7. type Decoder struct {
  8. decodedMap map[string]string
  9. }
  10. // NewDecoder creates a default decoder struct
  11. func NewDecoder() *Decoder {
  12. return &Decoder{
  13. decodedMap: make(map[string]string),
  14. }
  15. }
  16. // Decode returns the data with the values decoded in place along with the
  17. // encoded segment meta data for the next pass of decoding
  18. func (d *Decoder) Decode(data string, predecessors []*EncodedSegment) (string, []*EncodedSegment) {
  19. segments := d.findEncodedSegments(data, predecessors)
  20. if len(segments) > 0 {
  21. result := bytes.NewBuffer(make([]byte, 0, len(data)))
  22. encodedStart := 0
  23. for _, segment := range segments {
  24. result.WriteString(data[encodedStart:segment.encoded.start])
  25. result.WriteString(segment.decodedValue)
  26. encodedStart = segment.encoded.end
  27. }
  28. result.WriteString(data[encodedStart:])
  29. return result.String(), segments
  30. }
  31. return data, segments
  32. }
  33. // findEncodedSegments finds the encoded segments in the data
  34. func (d *Decoder) findEncodedSegments(data string, predecessors []*EncodedSegment) []*EncodedSegment {
  35. if len(data) == 0 {
  36. return []*EncodedSegment{}
  37. }
  38. decodedShift := 0
  39. encodingMatches := findEncodingMatches(data)
  40. segments := make([]*EncodedSegment, 0, len(encodingMatches))
  41. for _, m := range encodingMatches {
  42. encodedValue := data[m.start:m.end]
  43. decodedValue, alreadyDecoded := d.decodedMap[encodedValue]
  44. if !alreadyDecoded {
  45. decodedValue = m.encoding.decode(encodedValue)
  46. d.decodedMap[encodedValue] = decodedValue
  47. }
  48. if len(decodedValue) == 0 {
  49. continue
  50. }
  51. segment := &EncodedSegment{
  52. predecessors: predecessors,
  53. original: toOriginal(predecessors, m.startEnd),
  54. encoded: m.startEnd,
  55. decoded: startEnd{
  56. m.start + decodedShift,
  57. m.start + decodedShift + len(decodedValue),
  58. },
  59. decodedValue: decodedValue,
  60. encodings: m.encoding.kind,
  61. depth: 1,
  62. }
  63. // Shift decoded start and ends based on size changes
  64. decodedShift += len(decodedValue) - len(encodedValue)
  65. // Adjust depth and encoding if applicable
  66. if len(segment.predecessors) != 0 {
  67. // Set the depth based on the predecessors' depth in the previous pass
  68. segment.depth = 1 + segment.predecessors[0].depth
  69. // Adjust encodings
  70. for _, p := range segment.predecessors {
  71. if segment.encoded.overlaps(p.decoded) {
  72. segment.encodings |= p.encodings
  73. }
  74. }
  75. }
  76. segments = append(segments, segment)
  77. logging.Debug().
  78. Str("decoder", m.encoding.kind.String()).
  79. Msgf(
  80. "segment found: original=%s pos=%s: %q -> %q",
  81. segment.original,
  82. segment.encoded,
  83. encodedValue,
  84. segment.decodedValue,
  85. )
  86. }
  87. return segments
  88. }