Просмотр исходного кода

Ignore <media:title> in RSS 2.0 feeds

In the vast majority of cases, the default entry title is correct.

Ignoring <media:title> avoid overriding the default title if they are different.
Frédéric Guillot 5 лет назад
Родитель
Сommit
1d6b0491a7
2 измененных файлов с 70 добавлено и 2 удалено
  1. 45 0
      reader/rss/parser_test.go
  2. 25 2
      reader/rss/rss.go

+ 45 - 0
reader/rss/parser_test.go

@@ -136,6 +136,51 @@ func TestParseEntryWithoutTitle(t *testing.T) {
 	}
 }
 
+func TestParseEntryWithMediaTitle(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
+		<channel>
+			<link>https://example.org/</link>
+			<item>
+				<title>Entry Title</title>
+				<link>https://example.org/item</link>
+				<media:title>Media Title</media:title>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != "Entry Title" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
+func TestParseEntryWithDCTitleOnly(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:dc="http://purl.org/dc/elements/1.1/">
+		<channel>
+			<link>https://example.org/</link>
+			<item>
+				<dc:title>Entry Title</dc:title>
+				<link>https://example.org/item</link>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Entries[0].Title != "Entry Title" {
+		t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
+	}
+}
+
 func TestParseEntryWithoutLink(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">

+ 25 - 2
reader/rss/rss.go

@@ -122,6 +122,12 @@ type rssAuthor struct {
 	Inner   string `xml:",innerxml"`
 }
 
+type rssTitle struct {
+	XMLName xml.Name
+	Data    string `xml:",chardata"`
+	Inner   string `xml:",innerxml"`
+}
+
 type rssEnclosure struct {
 	URL    string `xml:"url,attr"`
 	Type   string `xml:"type,attr"`
@@ -138,7 +144,7 @@ func (enclosure *rssEnclosure) Size() int64 {
 
 type rssItem struct {
 	GUID           string           `xml:"guid"`
-	Title          string           `xml:"title"`
+	Title          []rssTitle       `xml:"title"`
 	Links          []rssLink        `xml:"link"`
 	Description    string           `xml:"description"`
 	PubDate        string           `xml:"pubDate"`
@@ -223,7 +229,24 @@ func (r *rssItem) entryHash() string {
 }
 
 func (r *rssItem) entryTitle() string {
-	return strings.TrimSpace(sanitizer.StripTags(r.Title))
+	var title string
+
+	for _, rssTitle := range r.Title {
+		switch rssTitle.XMLName.Space {
+		case "http://search.yahoo.com/mrss/":
+			// Ignore title in media namespace
+		case "http://purl.org/dc/elements/1.1/":
+			title = rssTitle.Data
+		default:
+			title = rssTitle.Data
+		}
+
+		if title != "" {
+			break
+		}
+	}
+
+	return strings.TrimSpace(sanitizer.StripTags(title))
 }
 
 func (r *rssItem) entryContent() string {