| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493 |
- // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
- // SPDX-License-Identifier: Apache-2.0
- package encoding // import "miniflux.app/v2/internal/reader/encoding"
- import (
- "bytes"
- "io"
- "os"
- "testing"
- "unicode/utf8"
- "golang.org/x/text/encoding/charmap"
- )
- func TestCharsetReaderWithUTF8(t *testing.T) {
- file := "testdata/utf8.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := CharsetReader("UTF-8", f)
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithISO88591(t *testing.T) {
- file := "testdata/iso-8859-1.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := CharsetReader("ISO-8859-1", f)
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithWindows1252(t *testing.T) {
- file := "testdata/windows-1252.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := CharsetReader("windows-1252", f)
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Euro €"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithInvalidProlog(t *testing.T) {
- file := "testdata/invalid-prolog.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := CharsetReader("invalid", f)
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithUTF8DocumentWithIncorrectProlog(t *testing.T) {
- file := "testdata/utf8-incorrect-prolog.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := CharsetReader("ISO-8859-1", f)
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithWindows1252DocumentWithIncorrectProlog(t *testing.T) {
- file := "testdata/windows-1252-incorrect-prolog.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := CharsetReader("windows-1252", f)
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Euro €"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestNewReaderWithUTF8Document(t *testing.T) {
- file := "testdata/utf8.html"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := NewCharsetReader(f, "text/html; charset=UTF-8")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestNewReaderWithUTF8DocumentAndNoContentEncoding(t *testing.T) {
- file := "testdata/utf8.html"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := NewCharsetReader(f, "text/html")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestNewReaderWithISO88591Document(t *testing.T) {
- file := "testdata/iso-8859-1.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := NewCharsetReader(f, "text/html; charset=ISO-8859-1")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestNewReaderWithISO88591DocumentAndNoContentType(t *testing.T) {
- file := "testdata/iso-8859-1.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := NewCharsetReader(f, "")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestNewReaderWithISO88591DocumentWithMetaAfter1024Bytes(t *testing.T) {
- file := "testdata/iso-8859-1-meta-after-1024.html"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := NewCharsetReader(f, "text/html")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestNewReaderWithUTF8DocumentWithMetaAfter1024Bytes(t *testing.T) {
- file := "testdata/utf8-meta-after-1024.html"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := NewCharsetReader(f, "text/html")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- expectedUnicodeString := "Café"
- if !bytes.Contains(data, []byte(expectedUnicodeString)) {
- t.Fatalf("Data does not contain expected unicode string: %s", expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithKOI8RLabel(t *testing.T) {
- expectedUnicodeString := "Привет мир"
- input, err := charmap.KOI8R.NewEncoder().Bytes([]byte(expectedUnicodeString))
- if err != nil {
- t.Fatalf("Unable to build KOI8-R input: %v", err)
- }
- reader, err := CharsetReader("koi8-r", bytes.NewReader(input))
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- if string(data) != expectedUnicodeString {
- t.Fatalf("Data does not match expected unicode string, got %q expected %q", string(data), expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithUppercaseKOI8RLabel(t *testing.T) {
- expectedUnicodeString := "Привет мир"
- input, err := charmap.KOI8R.NewEncoder().Bytes([]byte(expectedUnicodeString))
- if err != nil {
- t.Fatalf("Unable to build KOI8-R input: %v", err)
- }
- reader, err := CharsetReader("KOI8-R", bytes.NewReader(input))
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- if string(data) != expectedUnicodeString {
- t.Fatalf("Data does not match expected unicode string, got %q expected %q", string(data), expectedUnicodeString)
- }
- }
- func TestCharsetReaderWithKOI8RFeedFixture(t *testing.T) {
- file := "testdata/koi8r.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := CharsetReader("KOI8-R", f)
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- if !bytes.Contains(data, []byte("Пример RSS ленты")) {
- t.Fatalf("Data does not contain expected unicode string: %s", "Пример RSS ленты")
- }
- if !bytes.Contains(data, []byte("Привет мир! Ёжик, чай, Москва, Санкт-Петербург.")) {
- t.Fatalf("Data does not contain expected unicode string: %s", "Привет мир! Ёжик, чай, Москва, Санкт-Петербург.")
- }
- }
- func TestNewCharsetReaderWithKOI8RContentType(t *testing.T) {
- expectedUnicodeString := "Привет мир"
- input, err := charmap.KOI8R.NewEncoder().Bytes([]byte(expectedUnicodeString))
- if err != nil {
- t.Fatalf("Unable to build KOI8-R input: %v", err)
- }
- reader, err := NewCharsetReader(bytes.NewReader(input), "text/xml; charset=koi8-r")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- if string(data) != expectedUnicodeString {
- t.Fatalf("Data does not match expected unicode string, got %q expected %q", string(data), expectedUnicodeString)
- }
- }
- func TestNewCharsetReaderWithKOI8RFeedFixtureAndContentType(t *testing.T) {
- file := "testdata/koi8r.xml"
- f, err := os.Open(file)
- if err != nil {
- t.Fatalf("Unable to open file: %v", err)
- }
- reader, err := NewCharsetReader(f, "application/rss+xml; charset=KOI8-R")
- if err != nil {
- t.Fatalf("Unable to create reader: %v", err)
- }
- data, err := io.ReadAll(reader)
- if err != nil {
- t.Fatalf("Unable to read data: %v", err)
- }
- if !utf8.Valid(data) {
- t.Fatalf("Data is not valid UTF-8")
- }
- if !bytes.Contains(data, []byte("Тестовая лента в кодировке KOI8-R")) {
- t.Fatalf("Data does not contain expected unicode string: %s", "Тестовая лента в кодировке KOI8-R")
- }
- if !bytes.Contains(data, []byte("Проверка специальных символов")) {
- t.Fatalf("Data does not contain expected unicode string: %s", "Проверка специальных символов")
- }
- }
|