decoder_test.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package xml // import "miniflux.app/v2/internal/reader/xml"
  4. import (
  5. "encoding/xml"
  6. "fmt"
  7. "strings"
  8. "testing"
  9. "unicode/utf8"
  10. )
  11. func TestXMLDocumentWithIllegalUnicodeCharacters(t *testing.T) {
  12. type myxml struct {
  13. XMLName xml.Name `xml:"rss"`
  14. Version string `xml:"version,attr"`
  15. Title string `xml:"title"`
  16. }
  17. expected := "Title & 中文标题"
  18. data := fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
  19. reader := strings.NewReader(data)
  20. var x myxml
  21. decoder := NewXMLDecoder(reader)
  22. err := decoder.Decode(&x)
  23. if err != nil {
  24. t.Error(err)
  25. return
  26. }
  27. if x.Title != expected {
  28. t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
  29. }
  30. }
  31. func TestXMLDocumentWindows251EncodedWithIllegalCharacters(t *testing.T) {
  32. type myxml struct {
  33. XMLName xml.Name `xml:"rss"`
  34. Version string `xml:"version,attr"`
  35. Title string `xml:"title"`
  36. }
  37. expected := "Title & 中文标题"
  38. data := fmt.Sprintf(`<?xml version="1.0" encoding="windows-1251"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
  39. reader := strings.NewReader(data)
  40. var x myxml
  41. decoder := NewXMLDecoder(reader)
  42. err := decoder.Decode(&x)
  43. if err != nil {
  44. t.Error(err)
  45. return
  46. }
  47. if x.Title != expected {
  48. t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
  49. }
  50. }
  51. func TestXMLDocumentWithIncorrectEncodingField(t *testing.T) {
  52. type myxml struct {
  53. XMLName xml.Name `xml:"rss"`
  54. Version string `xml:"version,attr"`
  55. Title string `xml:"title"`
  56. }
  57. expected := "Title & 中文标题"
  58. data := fmt.Sprintf(`<?xml version="1.0" encoding="invalid"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
  59. reader := strings.NewReader(data)
  60. var x myxml
  61. decoder := NewXMLDecoder(reader)
  62. err := decoder.Decode(&x)
  63. if err != nil {
  64. t.Error(err)
  65. return
  66. }
  67. if x.Title != expected {
  68. t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
  69. }
  70. }
  71. func TestFilterValidXMLCharsWithInvalidUTF8Sequence(t *testing.T) {
  72. // Create input with invalid UTF-8 sequence
  73. input := []byte{0x41, 0xC0, 0xAF, 0x42} // 'A', invalid UTF-8, 'B'
  74. filtered := filterValidXMLChars(input)
  75. // The function would replace invalid UTF-8 with replacement char
  76. // rather than properly filtering
  77. if utf8.Valid(filtered) {
  78. r, _ := utf8.DecodeRune(filtered[1:])
  79. if r == utf8.RuneError {
  80. t.Error("Invalid UTF-8 was not properly filtered")
  81. }
  82. }
  83. }
  84. func FuzzFilterValidXMLChars(f *testing.F) {
  85. f.Fuzz(func(t *testing.T, s []byte) {
  86. filterValidXMLChars(s)
  87. })
  88. }