Browse Source

Add support of media elements for RSS 2 feeds

Frédéric Guillot 6 years ago
parent
commit
f90e9dfab0
2 changed files with 221 additions and 19 deletions
  1. 119 0
      reader/rss/parser_test.go
  2. 102 19
      reader/rss/rss.go

+ 119 - 0
reader/rss/parser_test.go

@@ -652,3 +652,122 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) {
 		t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
 	}
 }
+
+func TestParseEntryWithMediaGroup(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
+		<channel>
+		<title>My Example Feed</title>
+		<link>http://example.org</link>
+		<item>
+			<title>Example Item</title>
+			<link>http://www.example.org/entries/1</link>
+			<enclosure type="application/x-bittorrent" url="https://example.org/file3.torrent" length="670053113">
+			</enclosure>
+			<media:group>
+				<media:content type="application/x-bittorrent" url="https://example.org/file1.torrent"></media:content>
+				<media:content type="application/x-bittorrent" url="https://example.org/file2.torrent" isDefault="true"></media:content>
+				<media:content type="application/x-bittorrent" url="https://example.org/file3.torrent"></media:content>
+				<media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content>
+				<media:content type="application/x-bittorrent" url="https://example.org/file5.torrent" fileSize="42"></media:content>
+				<media:rating>nonadult</media:rating>
+			</media:group>
+			<media:thumbnail url="https://example.org/image.jpg" height="122" width="223"></media:thumbnail>
+		</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+	if len(feed.Entries[0].Enclosures) != 6 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	expectedResults := []struct {
+		url      string
+		mimeType string
+		size     int64
+	}{
+		{"https://example.org/image.jpg", "image/*", 0},
+		{"https://example.org/file3.torrent", "application/x-bittorrent", 670053113},
+		{"https://example.org/file1.torrent", "application/x-bittorrent", 0},
+		{"https://example.org/file2.torrent", "application/x-bittorrent", 0},
+		{"https://example.org/file4.torrent", "application/x-bittorrent", 0},
+		{"https://example.org/file5.torrent", "application/x-bittorrent", 42},
+	}
+
+	for index, enclosure := range feed.Entries[0].Enclosures {
+		if expectedResults[index].url != enclosure.URL {
+			t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url)
+		}
+
+		if expectedResults[index].mimeType != enclosure.MimeType {
+			t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType)
+		}
+
+		if expectedResults[index].size != enclosure.Size {
+			t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size)
+		}
+	}
+}
+
+func TestParseEntryWithMediaContent(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
+		<channel>
+		<title>My Example Feed</title>
+		<link>http://example.org</link>
+		<item>
+			<title>Example Item</title>
+			<link>http://www.example.org/entries/1</link>
+			<media:thumbnail url="https://example.org/thumbnail.jpg" />
+			<media:content url="https://example.org/media1.jpg" medium="image">
+				<media:title type="html">Some Title for Media 1</media:title>
+			</media:content>
+			<media:content url="https://example.org/media2.jpg" medium="image" />
+		</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse(bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+	if len(feed.Entries[0].Enclosures) != 3 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	expectedResults := []struct {
+		url      string
+		mimeType string
+		size     int64
+	}{
+		{"https://example.org/thumbnail.jpg", "image/*", 0},
+		{"https://example.org/media1.jpg", "image/*", 0},
+		{"https://example.org/media2.jpg", "image/*", 0},
+	}
+
+	for index, enclosure := range feed.Entries[0].Enclosures {
+		if expectedResults[index].url != enclosure.URL {
+			t.Errorf(`Unexpected enclosure URL, got %q instead of %q`, enclosure.URL, expectedResults[index].url)
+		}
+
+		if expectedResults[index].mimeType != enclosure.MimeType {
+			t.Errorf(`Unexpected enclosure type, got %q instead of %q`, enclosure.MimeType, expectedResults[index].mimeType)
+		}
+
+		if expectedResults[index].size != enclosure.Size {
+			t.Errorf(`Unexpected enclosure size, got %d instead of %d`, enclosure.Size, expectedResults[index].size)
+		}
+	}
+}

+ 102 - 19
reader/rss/rss.go

@@ -56,20 +56,71 @@ type rssEnclosure struct {
 	Length string `xml:"length,attr"`
 }
 
+func (enclosure *rssEnclosure) Size() int64 {
+	if enclosure.Length == "" {
+		return 0
+	}
+	size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
+	return size
+}
+
 type rssItem struct {
-	GUID              string           `xml:"guid"`
-	Title             string           `xml:"title"`
-	Links             []rssLink        `xml:"link"`
-	OriginalLink      string           `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
-	CommentLinks      []rssCommentLink `xml:"comments"`
-	Description       string           `xml:"description"`
-	EncodedContent    string           `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
-	PubDate           string           `xml:"pubDate"`
-	Date              string           `xml:"http://purl.org/dc/elements/1.1/ date"`
-	Authors           []rssAuthor      `xml:"author"`
-	Creator           string           `xml:"http://purl.org/dc/elements/1.1/ creator"`
-	EnclosureLinks    []rssEnclosure   `xml:"enclosure"`
-	OrigEnclosureLink string           `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
+	GUID              string               `xml:"guid"`
+	Title             string               `xml:"title"`
+	Links             []rssLink            `xml:"link"`
+	OriginalLink      string               `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
+	CommentLinks      []rssCommentLink     `xml:"comments"`
+	Description       string               `xml:"description"`
+	EncodedContent    string               `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
+	PubDate           string               `xml:"pubDate"`
+	Date              string               `xml:"http://purl.org/dc/elements/1.1/ date"`
+	Authors           []rssAuthor          `xml:"author"`
+	Creator           string               `xml:"http://purl.org/dc/elements/1.1/ creator"`
+	EnclosureLinks    []rssEnclosure       `xml:"enclosure"`
+	OrigEnclosureLink string               `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
+	MediaGroup        []rssMediaGroup      `xml:"http://search.yahoo.com/mrss/ group"`
+	MediaContents     []rssMediaContent    `xml:"http://search.yahoo.com/mrss/ content"`
+	MediaThumbnails   []rssMediaThumbnails `xml:"http://search.yahoo.com/mrss/ thumbnail"`
+}
+
+type rssMediaGroup struct {
+	MediaList []rssMediaContent `xml:"content"`
+}
+
+type rssMediaContent struct {
+	URL      string `xml:"url,attr"`
+	Type     string `xml:"type,attr"`
+	FileSize string `xml:"fileSize,attr"`
+	Medium   string `xml:"medium,attr"`
+}
+
+func (mediaContent *rssMediaContent) MimeType() string {
+	switch {
+	case mediaContent.Type == "" && mediaContent.Medium == "image":
+		return "image/*"
+	case mediaContent.Type == "" && mediaContent.Medium == "video":
+		return "video/*"
+	case mediaContent.Type == "" && mediaContent.Medium == "audio":
+		return "audio/*"
+	case mediaContent.Type == "" && mediaContent.Medium == "video":
+		return "video/*"
+	case mediaContent.Type != "":
+		return mediaContent.Type
+	default:
+		return "application/octet-stream"
+	}
+}
+
+func (mediaContent *rssMediaContent) Size() int64 {
+	if mediaContent.FileSize == "" {
+		return 0
+	}
+	size, _ := strconv.ParseInt(mediaContent.FileSize, 10, 0)
+	return size
+}
+
+type rssMediaThumbnails struct {
+	URL string `xml:"url,attr"`
 }
 
 func (r *rssFeed) SiteURL() string {
@@ -200,9 +251,20 @@ func (r *rssItem) URL() string {
 
 func (r *rssItem) Enclosures() model.EnclosureList {
 	enclosures := make(model.EnclosureList, 0)
+	duplicates := make(map[string]bool, 0)
+
+	for _, mediaThumbnail := range r.MediaThumbnails {
+		if _, found := duplicates[mediaThumbnail.URL]; !found {
+			duplicates[mediaThumbnail.URL] = true
+			enclosures = append(enclosures, &model.Enclosure{
+				URL:      mediaThumbnail.URL,
+				MimeType: "image/*",
+				Size:     0,
+			})
+		}
+	}
 
 	for _, enclosure := range r.EnclosureLinks {
-		length, _ := strconv.ParseInt(enclosure.Length, 10, 0)
 		enclosureURL := enclosure.URL
 
 		if r.OrigEnclosureLink != "" {
@@ -212,11 +274,32 @@ func (r *rssItem) Enclosures() model.EnclosureList {
 			}
 		}
 
-		enclosures = append(enclosures, &model.Enclosure{
-			URL:      enclosureURL,
-			MimeType: enclosure.Type,
-			Size:     length,
-		})
+		if _, found := duplicates[enclosureURL]; !found {
+			duplicates[enclosureURL] = true
+
+			enclosures = append(enclosures, &model.Enclosure{
+				URL:      enclosureURL,
+				MimeType: enclosure.Type,
+				Size:     enclosure.Size(),
+			})
+		}
+	}
+
+	for _, mediaContentItem := range r.MediaGroup {
+		for _, mediaContent := range mediaContentItem.MediaList {
+			r.MediaContents = append(r.MediaContents, mediaContent)
+		}
+	}
+
+	for _, mediaContent := range r.MediaContents {
+		if _, found := duplicates[mediaContent.URL]; !found {
+			duplicates[mediaContent.URL] = true
+			enclosures = append(enclosures, &model.Enclosure{
+				URL:      mediaContent.URL,
+				MimeType: mediaContent.MimeType(),
+				Size:     mediaContent.Size(),
+			})
+		}
 	}
 
 	return enclosures