Browse Source

Handle RDF feed with HTML encoded entry title

Example: http://rss.slashdot.org/Slashdot/slashdotMain
Frédéric Guillot 5 years ago
parent
commit
96f3e888cf
2 changed files with 60 additions and 1 deletions
  1. 58 0
      reader/rdf/parser_test.go
  2. 2 1
      reader/rdf/rdf.go

+ 58 - 0
reader/rdf/parser_test.go

@@ -377,6 +377,31 @@ func TestParseItemWithoutDate(t *testing.T) {
 	}
 }
 
+func TestParseItemWithEncodedHTMLTitle(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+	  <channel>
+			<title>Example</title>
+			<link>http://example.org</link>
+	  </channel>
+
+	  <item>
+			<title>AT&amp;amp;T</title>
+			<description>Test</description>
+			<link>http://example.org/test.html</link>
+	  </item>
+	</rdf:RDF>`
+
+	feed, err := Parse("http://example.org", bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != `AT&T` {
+		t.Errorf("Incorrect entry title, got: %v", feed.Entries[0].Title)
+	}
+}
+
 func TestParseInvalidXml(t *testing.T) {
 	data := `garbage`
 	_, err := Parse("http://example.org", bytes.NewBufferString(data))
@@ -519,3 +544,36 @@ func TestParseRDFWithContentEncoded(t *testing.T) {
 		t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected)
 	}
 }
+
+func TestParseRDFWithEncodedHTMLDescription(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<rdf:RDF
+		xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+		xmlns="http://purl.org/rss/1.0/"
+		xmlns:content="http://purl.org/rss/1.0/modules/content/">
+		<channel>
+			<title>Example Feed</title>
+			<link>http://example.org/</link>
+		</channel>
+		<item>
+			<title>Item Title</title>
+			<link>http://example.org/</link>
+			<description>AT&amp;amp;T &lt;img src="https://example.org/img.png"&gt;&lt;/a&gt;</description>
+		</item>
+	</rdf:RDF>`
+
+	feed, err := Parse("http://example.org/", bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
+	}
+
+	expected := `AT&amp;T <img src="https://example.org/img.png"></a>`
+	result := feed.Entries[0].Content
+	if result != expected {
+		t.Errorf(`Unexpected entry URL, got %v instead of %v`, result, expected)
+	}
+}

+ 2 - 1
reader/rdf/rdf.go

@@ -6,6 +6,7 @@ package rdf // import "miniflux.app/reader/rdf"
 
 import (
 	"encoding/xml"
+	"html"
 	"strings"
 	"time"
 
@@ -75,7 +76,7 @@ func (r *rdfItem) Transform() *model.Entry {
 }
 
 func (r *rdfItem) entryTitle() string {
-	return strings.TrimSpace(r.Title)
+	return html.UnescapeString(strings.TrimSpace(r.Title))
 }
 
 func (r *rdfItem) entryContent() string {