Explorar el Código

test(encoding): add KOI8-R encoding tests with a sample XML feed

Frédéric Guillot hace 1 mes
padre
commit
fb7f16ecf2

+ 62 - 0
internal/reader/encoding/encoding_test.go

@@ -403,6 +403,37 @@ func TestCharsetReaderWithUppercaseKOI8RLabel(t *testing.T) {
 	}
 }
 
+func TestCharsetReaderWithKOI8RFeedFixture(t *testing.T) {
+	file := "testdata/koi8r.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := CharsetReader("KOI8-R", f)
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+
+	if !bytes.Contains(data, []byte("Пример RSS ленты")) {
+		t.Fatalf("Data does not contain expected unicode string: %s", "Пример RSS ленты")
+	}
+
+	if !bytes.Contains(data, []byte("Привет мир! Ёжик, чай, Москва, Санкт-Петербург.")) {
+		t.Fatalf("Data does not contain expected unicode string: %s", "Привет мир! Ёжик, чай, Москва, Санкт-Петербург.")
+	}
+}
+
 func TestNewCharsetReaderWithKOI8RContentType(t *testing.T) {
 	expectedUnicodeString := "Привет мир"
 
@@ -429,3 +460,34 @@ func TestNewCharsetReaderWithKOI8RContentType(t *testing.T) {
 		t.Fatalf("Data does not match expected unicode string, got %q expected %q", string(data), expectedUnicodeString)
 	}
 }
+
+func TestNewCharsetReaderWithKOI8RFeedFixtureAndContentType(t *testing.T) {
+	file := "testdata/koi8r.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := NewCharsetReader(f, "application/rss+xml; charset=KOI8-R")
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+
+	if !bytes.Contains(data, []byte("Тестовая лента в кодировке KOI8-R")) {
+		t.Fatalf("Data does not contain expected unicode string: %s", "Тестовая лента в кодировке KOI8-R")
+	}
+
+	if !bytes.Contains(data, []byte("Проверка специальных символов")) {
+		t.Fatalf("Data does not contain expected unicode string: %s", "Проверка специальных символов")
+	}
+}

+ 33 - 0
internal/reader/encoding/testdata/koi8r.xml

@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="KOI8-R"?>
+<rss version="2.0">
+  <channel>
+    <title>ðÒÉÍÅÒ RSS ÌÅÎÔÙ</title>
+    <link>http://example.com/</link>
+    <description>ôÅÓÔÏ×ÁÑ ÌÅÎÔÁ × ËÏÄÉÒÏ×ËÅ KOI8-R</description>
+    <language>ru</language>
+    <lastBuildDate>Sat, 15 Feb 2026 12:00:00 +0000</lastBuildDate>
+
+    <item>
+      <title>ðÅÒ×ÁÑ ÎÏ×ÏÓÔØ</title>
+      <link>http://example.com/post1</link>
+      <guid>http://example.com/post1</guid>
+      <pubDate>Sat, 15 Feb 2026 10:00:00 +0000</pubDate>
+      <description>
+        üÔÏ ÔÅÓÔÏ×ÏÅ ÏÐÉÓÁÎÉÅ Ó ÒÕÓÓËÉÍÉ ÓÉÍ×ÏÌÁÍÉ: 
+        ðÒÉ×ÅÔ ÍÉÒ! ³ÖÉË, ÞÁÊ, íÏÓË×Á, óÁÎËÔ-ðÅÔÅÒÂÕÒÇ.
+      </description>
+    </item>
+
+    <item>
+      <title>÷ÔÏÒÁÑ ÎÏ×ÏÓÔØ</title>
+      <link>http://example.com/post2</link>
+      <guid>http://example.com/post2</guid>
+      <pubDate>Sat, 15 Feb 2026 11:00:00 +0000</pubDate>
+      <description>
+        ðÒÏ×ÅÒËÁ ÓÐÅÃÉÁÌØÎÙÈ ÓÉÍ×ÏÌÏ×: &amp; &lt; &gt; 
+        á ÔÁËÖÅ ÞÉÓÌÁ: 1234567890.
+      </description>
+    </item>
+
+  </channel>
+</rss>

+ 56 - 0
internal/reader/xml/decoder_test.go

@@ -78,6 +78,62 @@ func TestXMLDocumentWithISO88591FileEncodingButUTF8Prolog(t *testing.T) {
 	}
 }
 
+func TestXMLDocumentWithKOI8REncoding(t *testing.T) {
+	fp, err := os.Open("testdata/koi8r.xml")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer fp.Close()
+
+	type item struct {
+		Title       string `xml:"title"`
+		Description string `xml:"description"`
+	}
+
+	type channel struct {
+		Title       string `xml:"title"`
+		Description string `xml:"description"`
+		Items       []item `xml:"item"`
+	}
+
+	type rss struct {
+		XMLName xml.Name `xml:"rss"`
+		Channel channel  `xml:"channel"`
+	}
+
+	var doc rss
+
+	decoder := NewXMLDecoder(fp)
+	err = decoder.Decode(&doc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if doc.Channel.Title != "Пример RSS ленты" {
+		t.Errorf("Incorrect channel title, expected: %q, got: %q", "Пример RSS ленты", doc.Channel.Title)
+	}
+
+	if doc.Channel.Description != "Тестовая лента в кодировке KOI8-R" {
+		t.Errorf("Incorrect channel description, expected: %q, got: %q", "Тестовая лента в кодировке KOI8-R", doc.Channel.Description)
+	}
+
+	if len(doc.Channel.Items) != 2 {
+		t.Fatalf("Incorrect number of items, expected: %d, got: %d", 2, len(doc.Channel.Items))
+	}
+
+	if doc.Channel.Items[0].Title != "Первая новость" {
+		t.Errorf("Incorrect first item title, expected: %q, got: %q", "Первая новость", doc.Channel.Items[0].Title)
+	}
+
+	if !strings.Contains(doc.Channel.Items[0].Description, "Привет мир! Ёжик, чай, Москва, Санкт-Петербург.") {
+		t.Errorf("First item description does not contain expected text, got: %q", doc.Channel.Items[0].Description)
+	}
+
+	if !strings.Contains(doc.Channel.Items[1].Description, "Проверка специальных символов") {
+		t.Errorf("Second item description does not contain expected text, got: %q", doc.Channel.Items[1].Description)
+	}
+}
+
 func TestXMLDocumentWithIllegalUnicodeCharacters(t *testing.T) {
 	type myxml struct {
 		XMLName xml.Name `xml:"rss"`

+ 33 - 0
internal/reader/xml/testdata/koi8r.xml

@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="KOI8-R"?>
+<rss version="2.0">
+  <channel>
+    <title>ðÒÉÍÅÒ RSS ÌÅÎÔÙ</title>
+    <link>http://example.com/</link>
+    <description>ôÅÓÔÏ×ÁÑ ÌÅÎÔÁ × ËÏÄÉÒÏ×ËÅ KOI8-R</description>
+    <language>ru</language>
+    <lastBuildDate>Sat, 15 Feb 2026 12:00:00 +0000</lastBuildDate>
+
+    <item>
+      <title>ðÅÒ×ÁÑ ÎÏ×ÏÓÔØ</title>
+      <link>http://example.com/post1</link>
+      <guid>http://example.com/post1</guid>
+      <pubDate>Sat, 15 Feb 2026 10:00:00 +0000</pubDate>
+      <description>
+        üÔÏ ÔÅÓÔÏ×ÏÅ ÏÐÉÓÁÎÉÅ Ó ÒÕÓÓËÉÍÉ ÓÉÍ×ÏÌÁÍÉ: 
+        ðÒÉ×ÅÔ ÍÉÒ! ³ÖÉË, ÞÁÊ, íÏÓË×Á, óÁÎËÔ-ðÅÔÅÒÂÕÒÇ.
+      </description>
+    </item>
+
+    <item>
+      <title>÷ÔÏÒÁÑ ÎÏ×ÏÓÔØ</title>
+      <link>http://example.com/post2</link>
+      <guid>http://example.com/post2</guid>
+      <pubDate>Sat, 15 Feb 2026 11:00:00 +0000</pubDate>
+      <description>
+        ðÒÏ×ÅÒËÁ ÓÐÅÃÉÁÌØÎÙÈ ÓÉÍ×ÏÌÏ×: &amp; &lt; &gt; 
+        á ÔÁËÖÅ ÞÉÓÌÁ: 1234567890.
+      </description>
+    </item>
+
+  </channel>
+</rss>