Ver Fonte

Handle Atom feeds with HTML title

Frédéric Guillot há 8 anos atrás
pai
commit
9292d5d604
2 ficheiros alterados com 92 adições e 2 exclusões
  1. 14 2
      reader/atom/atom.go
  2. 78 0
      reader/atom/parser_test.go

+ 14 - 2
reader/atom/atom.go

@@ -14,6 +14,7 @@ import (
 	"github.com/miniflux/miniflux/logger"
 	"github.com/miniflux/miniflux/model"
 	"github.com/miniflux/miniflux/reader/date"
+	"github.com/miniflux/miniflux/reader/sanitizer"
 	"github.com/miniflux/miniflux/url"
 )
 
@@ -28,7 +29,7 @@ type atomFeed struct {
 
 type atomEntry struct {
 	ID         string         `xml:"id"`
-	Title      string         `xml:"title"`
+	Title      atomContent    `xml:"title"`
 	Updated    string         `xml:"updated"`
 	Links      []atomLink     `xml:"link"`
 	Summary    string         `xml:"summary"`
@@ -97,7 +98,7 @@ func (a *atomEntry) Transform() *model.Entry {
 	entry.Author = getAuthor(a.Author)
 	entry.Hash = getHash(a)
 	entry.Content = getContent(a)
-	entry.Title = strings.TrimSpace(a.Title)
+	entry.Title = getTitle(a)
 	entry.Enclosures = getEnclosures(a)
 	return entry
 }
@@ -160,6 +161,17 @@ func getContent(a *atomEntry) string {
 	return ""
 }
 
+func getTitle(a *atomEntry) string {
+	title := ""
+	if a.Title.Type == "xhtml" {
+		title = a.Title.XML
+	} else {
+		title = a.Title.Data
+	}
+
+	return strings.TrimSpace(sanitizer.StripTags(title))
+}
+
 func getHash(a *atomEntry) string {
 	for _, value := range []string{a.ID, getURL(a.Links)} {
 		if value != "" {

+ 78 - 0
reader/atom/parser_test.go

@@ -206,6 +206,84 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) {
 	}
 }
 
+func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom">
+	  <title>Example Feed</title>
+	  <link href="http://example.org/"/>
+
+	  <entry>
+		<title type="html"><![CDATA[Test &#8220;Test&#8221;]]></title>
+		<link href="http://example.org/2003/12/13/atom03"/>
+		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<summary>Some text.</summary>
+	  </entry>
+
+	</feed>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if feed.Entries[0].Title != "Test “Test”" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
+func TestParseEntryTitleWithHTML(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom">
+	  <title>Example Feed</title>
+	  <link href="http://example.org/"/>
+
+	  <entry>
+		<title type="html">&lt;code&gt;Test&lt;/code&gt; Test</title>
+		<link href="http://example.org/2003/12/13/atom03"/>
+		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<summary>Some text.</summary>
+	  </entry>
+
+	</feed>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if feed.Entries[0].Title != "Test Test" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
+func TestParseEntryTitleWithXHTML(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom">
+	  <title>Example Feed</title>
+	  <link href="http://example.org/"/>
+
+	  <entry>
+		<title type="xhtml"><code>Test</code> Test</title>
+		<link href="http://example.org/2003/12/13/atom03"/>
+		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<summary>Some text.</summary>
+	  </entry>
+
+	</feed>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Error(err)
+	}
+
+	if feed.Entries[0].Title != "Test Test" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
 func TestParseEntryWithAuthorName(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 	<feed xmlns="http://www.w3.org/2005/Atom">