Selaa lähdekoodia

feat(rss): add workaround for RSS item title with HTML content

Frédéric Guillot 1 vuosi sitten
vanhempi
commit
a3ce03cc9d

+ 2 - 2
internal/reader/rss/adapter.go

@@ -173,13 +173,13 @@ func findFeedAuthor(rssChannel *RSSChannel) string {
 }
 
 func findEntryTitle(rssItem *RSSItem) string {
-	title := rssItem.Title
+	title := sanitizer.StripTags(rssItem.Title.Inner)
 
 	if rssItem.DublinCoreTitle != "" {
 		title = rssItem.DublinCoreTitle
 	}
 
-	return html.UnescapeString(strings.TrimSpace(title))
+	return html.UnescapeString(html.UnescapeString(strings.TrimSpace(title)))
 }
 
 func findEntryURL(rssItem *RSSItem) string {

+ 24 - 0
internal/reader/rss/parser_test.go

@@ -1023,6 +1023,30 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) {
 	}
 }
 
+func TestParseEntryTitleWithInnerHTML(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<rss version="2.0">
+	<channel>
+		<title>Example</title>
+		<link>http://example.org</link>
+		<item>
+			<title>Test: <b>bold</b></title>
+			<link>http://www.example.org/entries/1</link>
+			<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
+		</item>
+	</channel>
+	</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != "Test: bold" {
+		t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
+	}
+}
+
 func TestParseEntryWithEnclosures(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">

+ 6 - 1
internal/reader/rss/rss.go

@@ -111,7 +111,7 @@ type RSSImage struct {
 
 type RSSItem struct {
 	// Title is the title of the item.
-	Title string `xml:"rss title"`
+	Title RSSTitle `xml:"rss title"`
 
 	// Link is the URL of the item.
 	Link string `xml:"rss link"`
@@ -169,6 +169,11 @@ type RSSItem struct {
 	googleplay.GooglePlayItemElement
 }
 
+type RSSTitle struct {
+	Data  string `xml:",chardata"`
+	Inner string `xml:",innerxml"`
+}
+
 type RSSAuthor struct {
 	XMLName xml.Name
 	Data    string `xml:",chardata"`