4
0
Эх сурвалжийг харах

test(encoding): add unit tests for CharsetReader function

Frédéric Guillot 1 жил өмнө
parent
commit
af1f966250

+ 149 - 0
internal/reader/encoding/encoding_test.go

@@ -0,0 +1,149 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package encoding // import "miniflux.app/v2/internal/reader/encoding"
+
+import (
+	"io"
+	"os"
+	"testing"
+	"unicode/utf8"
+)
+
+func TestCharsetReaderWithUTF8(t *testing.T) {
+	file := "testdata/utf8.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := CharsetReader("UTF-8", f)
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+}
+
+func TestCharsetReaderWithISO88591(t *testing.T) {
+	file := "testdata/iso-8859-1.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := CharsetReader("ISO-8859-1", f)
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+}
+
+func TestCharsetReaderWithWindows1252(t *testing.T) {
+	file := "testdata/windows-1252.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := CharsetReader("windows-1252", f)
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+}
+
+func TestCharsetReaderWithInvalidProlog(t *testing.T) {
+	file := "testdata/invalid-prolog.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := CharsetReader("invalid", f)
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+}
+
+func TestCharsetReaderWithUTF8DocumentWithIncorrectProlog(t *testing.T) {
+	file := "testdata/utf8-incorrect-prolog.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := CharsetReader("ISO-8859-1", f)
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+}
+
+func TestCharsetReaderWithWindows1252DocumentWithIncorrectProlog(t *testing.T) {
+	file := "testdata/windows-1252-incorrect-prolog.xml"
+
+	f, err := os.Open(file)
+	if err != nil {
+		t.Fatalf("Unable to open file: %v", err)
+	}
+
+	reader, err := CharsetReader("windows-1252", f)
+	if err != nil {
+		t.Fatalf("Unable to create reader: %v", err)
+	}
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		t.Fatalf("Unable to read data: %v", err)
+	}
+
+	if !utf8.Valid(data) {
+		t.Fatalf("Data is not valid UTF-8")
+	}
+}

+ 7 - 0
internal/reader/encoding/testdata/invalid-prolog.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="invalid"?>
+<feed>
+    <title>테스트 피드</title>
+    <entry>
+        <title>こんにちは世界</title>
+    </entry>
+</feed>

+ 7 - 0
internal/reader/encoding/testdata/iso-8859-1.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<rss version="2.0">
+    <channel>
+        <title>Café</title>
+        <description>Présentation</description>
+    </channel>
+</rss>

+ 7 - 0
internal/reader/encoding/testdata/utf8-incorrect-prolog.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<feed>
+    <title>테스트 피드</title>
+    <entry>
+        <title>こんにちは世界</title>
+    </entry>
+</feed>

+ 7 - 0
internal/reader/encoding/testdata/utf8.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8"?>
+<feed>
+    <title>테스트 피드</title>
+    <entry>
+        <title>こんにちは世界</title>
+    </entry>
+</feed>

+ 6 - 0
internal/reader/encoding/testdata/windows-1252-incorrect-prolog.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="windows-1252"?>
+<rss version="2.0">
+    <channel>
+        <title>Euro €</title>
+    </channel>
+</rss>

+ 6 - 0
internal/reader/encoding/testdata/windows-1252.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="windows-1252"?>
+<rss version="2.0">
+    <channel>
+        <title>Euro €</title>
+    </channel>
+</rss>