Browse Source

Handle RSS feed title with encoded Unicode entities

Frédéric Guillot 5 years ago
parent
commit
5b8eb4735c
2 changed files with 20 additions and 1 deletions
  1. 19 0
      reader/rss/parser_test.go
  2. 1 1
      reader/rss/rss.go

+ 19 - 0
reader/rss/parser_test.go

@@ -998,6 +998,25 @@ func TestParseFeedTitleWithHTMLEntity(t *testing.T) {
 	}
 }
 
+func TestParseFeedTitleWithUnicodeEntityAndCdata(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
+		<channel>
+			<link>https://example.org/</link>
+			<title><![CDATA[Jenny&#8217;s Newsletter]]></title>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Title != `Jenny’s Newsletter` {
+		t.Errorf(`Incorrect title, got: %q`, feed.Title)
+	}
+}
+
 func TestParseItemTitleWithHTMLEntity(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">

+ 1 - 1
reader/rss/rss.go

@@ -53,7 +53,7 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
 		feed.FeedURL = feedURL
 	}
 
-	feed.Title = strings.TrimSpace(r.Title)
+	feed.Title = html.UnescapeString(strings.TrimSpace(r.Title))
 	if feed.Title == "" {
 		feed.Title = feed.SiteURL
 	}