| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
- // SPDX-License-Identifier: Apache-2.0
- package xml // import "miniflux.app/v2/internal/reader/xml"
- import (
- "encoding/xml"
- "fmt"
- "os"
- "strings"
- "testing"
- "unicode/utf8"
- )
- func TestXMLDocumentWithISO88591Encoding(t *testing.T) {
- fp, err := os.Open("testdata/iso88591.xml")
- if err != nil {
- t.Fatal(err)
- }
- defer fp.Close()
- type myXMLDocument struct {
- XMLName xml.Name `xml:"note"`
- To string `xml:"to"`
- From string `xml:"from"`
- }
- var doc myXMLDocument
- decoder := NewXMLDecoder(fp)
- err = decoder.Decode(&doc)
- if err != nil {
- t.Fatal(err)
- }
- expectedTo := "Anaïs"
- expectedFrom := "Jürgen"
- if doc.To != expectedTo {
- t.Errorf(`Incorrect "to" field, expected: %q, got: %q`, expectedTo, doc.To)
- }
- if doc.From != expectedFrom {
- t.Errorf(`Incorrect "from" field, expected: %q, got: %q`, expectedFrom, doc.From)
- }
- }
- func TestXMLDocumentWithISO88591FileEncodingButUTF8Prolog(t *testing.T) {
- fp, err := os.Open("testdata/iso88591_utf8_mismatch.xml")
- if err != nil {
- t.Fatal(err)
- }
- defer fp.Close()
- type myXMLDocument struct {
- XMLName xml.Name `xml:"note"`
- To string `xml:"to"`
- From string `xml:"from"`
- }
- var doc myXMLDocument
- decoder := NewXMLDecoder(fp)
- err = decoder.Decode(&doc)
- if err != nil {
- t.Fatal(err)
- }
- // TODO: detect actual encoding from bytes if not UTF-8 and convert to UTF-8 if needed.
- // For now we just expect the invalid characters to be stripped out.
- expectedTo := "Anas"
- expectedFrom := "Jrgen"
- if doc.To != expectedTo {
- t.Errorf(`Incorrect "to" field, expected: %q, got: %q`, expectedTo, doc.To)
- }
- if doc.From != expectedFrom {
- t.Errorf(`Incorrect "from" field, expected: %q, got: %q`, expectedFrom, doc.From)
- }
- }
- func TestXMLDocumentWithIllegalUnicodeCharacters(t *testing.T) {
- type myxml struct {
- XMLName xml.Name `xml:"rss"`
- Version string `xml:"version,attr"`
- Title string `xml:"title"`
- }
- expected := "Title & 中文标题"
- data := fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
- reader := strings.NewReader(data)
- var x myxml
- decoder := NewXMLDecoder(reader)
- err := decoder.Decode(&x)
- if err != nil {
- t.Error(err)
- return
- }
- if x.Title != expected {
- t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
- }
- }
- func TestXMLDocumentWindows251EncodedWithIllegalCharacters(t *testing.T) {
- type myxml struct {
- XMLName xml.Name `xml:"rss"`
- Version string `xml:"version,attr"`
- Title string `xml:"title"`
- }
- expected := "Title & 中文标题"
- data := fmt.Sprintf(`<?xml version="1.0" encoding="windows-1251"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
- reader := strings.NewReader(data)
- var x myxml
- decoder := NewXMLDecoder(reader)
- err := decoder.Decode(&x)
- if err != nil {
- t.Error(err)
- return
- }
- if x.Title != expected {
- t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
- }
- }
- func TestXMLDocumentWithIncorrectEncodingField(t *testing.T) {
- type myxml struct {
- XMLName xml.Name `xml:"rss"`
- Version string `xml:"version,attr"`
- Title string `xml:"title"`
- }
- expected := "Title & 中文标题"
- data := fmt.Sprintf(`<?xml version="1.0" encoding="invalid"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
- reader := strings.NewReader(data)
- var x myxml
- decoder := NewXMLDecoder(reader)
- err := decoder.Decode(&x)
- if err != nil {
- t.Error(err)
- return
- }
- if x.Title != expected {
- t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
- }
- }
- func TestFilterValidXMLCharsWithInvalidUTF8Sequence(t *testing.T) {
- // Create input with invalid UTF-8 sequence
- input := []byte{0x41, 0xC0, 0xAF, 0x42} // 'A', invalid UTF-8, 'B'
- filtered := filterValidXMLChars(input)
- // The function would replace invalid UTF-8 with replacement char
- // rather than properly filtering
- if utf8.Valid(filtered) {
- r, _ := utf8.DecodeRune(filtered[1:])
- if r == utf8.RuneError {
- t.Error("Invalid UTF-8 was not properly filtered")
- }
- }
- }
- func FuzzFilterValidXMLChars(f *testing.F) {
- f.Fuzz(func(t *testing.T, s []byte) {
- filterValidXMLChars(s)
- })
- }
|