ソースを参照

Handle entry title with double encoded entities in Atom feeds

Frédéric Guillot 5 年 前
コミット
04f9c456d5
2 ファイル変更55 行追加2 行削除
  1. 2 1
      reader/atom/atom_10.go
  2. 53 1
      reader/atom/atom_10_test.go

+ 2 - 1
reader/atom/atom_10.go

@@ -6,6 +6,7 @@ package atom // import "miniflux.app/reader/atom"
 
 import (
 	"encoding/xml"
+	"html"
 	"strconv"
 	"strings"
 	"time"
@@ -235,5 +236,5 @@ func (a *atom10Text) String() string {
 		content = a.Data
 	}
 
-	return strings.TrimSpace(content)
+	return html.UnescapeString(strings.TrimSpace(content))
 }

+ 53 - 1
reader/atom/atom_10_test.go

@@ -265,7 +265,7 @@ func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if feed.Entries[0].Title != "Test “Test”" {
+	if feed.Entries[0].Title != "Test “Test”" {
 		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
 	}
 }
@@ -322,6 +322,58 @@ func TestParseEntryTitleWithXHTML(t *testing.T) {
 	}
 }
 
+func TestParseEntryTitleWithNumericCharacterReference(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom">
+	  <title>Example Feed</title>
+	  <link href="http://example.org/"/>
+
+	  <entry>
+		<title>&#931; &#xDF;</title>
+		<link href="http://example.org/2003/12/13/atom03"/>
+		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<summary>Some text.</summary>
+	  </entry>
+
+	</feed>`
+
+	feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != "Σ ß" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
+func TestParseEntryTitleWithDoubleEncodedEntities(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom">
+	  <title>Example Feed</title>
+	  <link href="http://example.org/"/>
+
+	  <entry>
+		<title>&amp;#39;AT&amp;amp;T&amp;#39;</title>
+		<link href="http://example.org/2003/12/13/atom03"/>
+		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<summary>Some text.</summary>
+	  </entry>
+
+	</feed>`
+
+	feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != `'AT&T'` {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
 func TestParseEntrySummaryWithXHTML(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 	<feed xmlns="http://www.w3.org/2005/Atom">