decoder_test.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package xml // import "miniflux.app/v2/internal/reader/xml"
  4. import (
  5. "encoding/xml"
  6. "fmt"
  7. "os"
  8. "strings"
  9. "testing"
  10. "unicode/utf8"
  11. )
  12. func TestXMLDocumentWithISO88591Encoding(t *testing.T) {
  13. fp, err := os.Open("testdata/iso88591.xml")
  14. if err != nil {
  15. t.Fatal(err)
  16. }
  17. defer fp.Close()
  18. type myXMLDocument struct {
  19. XMLName xml.Name `xml:"note"`
  20. To string `xml:"to"`
  21. From string `xml:"from"`
  22. }
  23. var doc myXMLDocument
  24. decoder := NewXMLDecoder(fp)
  25. err = decoder.Decode(&doc)
  26. if err != nil {
  27. t.Fatal(err)
  28. }
  29. expectedTo := "Anaïs"
  30. expectedFrom := "Jürgen"
  31. if doc.To != expectedTo {
  32. t.Errorf(`Incorrect "to" field, expected: %q, got: %q`, expectedTo, doc.To)
  33. }
  34. if doc.From != expectedFrom {
  35. t.Errorf(`Incorrect "from" field, expected: %q, got: %q`, expectedFrom, doc.From)
  36. }
  37. }
  38. func TestXMLDocumentWithISO88591FileEncodingButUTF8Prolog(t *testing.T) {
  39. fp, err := os.Open("testdata/iso88591_utf8_mismatch.xml")
  40. if err != nil {
  41. t.Fatal(err)
  42. }
  43. defer fp.Close()
  44. type myXMLDocument struct {
  45. XMLName xml.Name `xml:"note"`
  46. To string `xml:"to"`
  47. From string `xml:"from"`
  48. }
  49. var doc myXMLDocument
  50. decoder := NewXMLDecoder(fp)
  51. err = decoder.Decode(&doc)
  52. if err != nil {
  53. t.Fatal(err)
  54. }
  55. // TODO: detect actual encoding from bytes if not UTF-8 and convert to UTF-8 if needed.
  56. // For now we just expect the invalid characters to be stripped out.
  57. expectedTo := "Anas"
  58. expectedFrom := "Jrgen"
  59. if doc.To != expectedTo {
  60. t.Errorf(`Incorrect "to" field, expected: %q, got: %q`, expectedTo, doc.To)
  61. }
  62. if doc.From != expectedFrom {
  63. t.Errorf(`Incorrect "from" field, expected: %q, got: %q`, expectedFrom, doc.From)
  64. }
  65. }
  66. func TestXMLDocumentWithIllegalUnicodeCharacters(t *testing.T) {
  67. type myxml struct {
  68. XMLName xml.Name `xml:"rss"`
  69. Version string `xml:"version,attr"`
  70. Title string `xml:"title"`
  71. }
  72. expected := "Title & 中文标题"
  73. data := fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
  74. reader := strings.NewReader(data)
  75. var x myxml
  76. decoder := NewXMLDecoder(reader)
  77. err := decoder.Decode(&x)
  78. if err != nil {
  79. t.Error(err)
  80. return
  81. }
  82. if x.Title != expected {
  83. t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
  84. }
  85. }
  86. func TestXMLDocumentWindows251EncodedWithIllegalCharacters(t *testing.T) {
  87. type myxml struct {
  88. XMLName xml.Name `xml:"rss"`
  89. Version string `xml:"version,attr"`
  90. Title string `xml:"title"`
  91. }
  92. expected := "Title & 中文标题"
  93. data := fmt.Sprintf(`<?xml version="1.0" encoding="windows-1251"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
  94. reader := strings.NewReader(data)
  95. var x myxml
  96. decoder := NewXMLDecoder(reader)
  97. err := decoder.Decode(&x)
  98. if err != nil {
  99. t.Error(err)
  100. return
  101. }
  102. if x.Title != expected {
  103. t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
  104. }
  105. }
  106. func TestXMLDocumentWithIncorrectEncodingField(t *testing.T) {
  107. type myxml struct {
  108. XMLName xml.Name `xml:"rss"`
  109. Version string `xml:"version,attr"`
  110. Title string `xml:"title"`
  111. }
  112. expected := "Title & 中文标题"
  113. data := fmt.Sprintf(`<?xml version="1.0" encoding="invalid"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
  114. reader := strings.NewReader(data)
  115. var x myxml
  116. decoder := NewXMLDecoder(reader)
  117. err := decoder.Decode(&x)
  118. if err != nil {
  119. t.Error(err)
  120. return
  121. }
  122. if x.Title != expected {
  123. t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
  124. }
  125. }
  126. func TestFilterValidXMLCharsWithInvalidUTF8Sequence(t *testing.T) {
  127. // Create input with invalid UTF-8 sequence
  128. input := []byte{0x41, 0xC0, 0xAF, 0x42} // 'A', invalid UTF-8, 'B'
  129. filtered := filterValidXMLChars(input)
  130. // The function would replace invalid UTF-8 with replacement char
  131. // rather than properly filtering
  132. if utf8.Valid(filtered) {
  133. r, _ := utf8.DecodeRune(filtered[1:])
  134. if r == utf8.RuneError {
  135. t.Error("Invalid UTF-8 was not properly filtered")
  136. }
  137. }
  138. }
  139. func FuzzFilterValidXMLChars(f *testing.F) {
  140. f.Fuzz(func(t *testing.T, s []byte) {
  141. filterValidXMLChars(s)
  142. })
  143. }