浏览代码

Ensure enclosure URLs are always absolute

Frédéric Guillot 2 年之前
父节点
当前提交
08640b27d5

+ 75 - 34
internal/reader/atom/atom_10_adapter.go

@@ -158,51 +158,92 @@ func (a *Atom10Adapter) populateEntries(siteURL string) model.Entries {
 		uniqueEnclosuresMap := make(map[string]bool)
 
 		for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
-			if _, found := uniqueEnclosuresMap[mediaThumbnail.URL]; !found {
-				uniqueEnclosuresMap[mediaThumbnail.URL] = true
-				entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
-					URL:      mediaThumbnail.URL,
-					MimeType: mediaThumbnail.MimeType(),
-					Size:     mediaThumbnail.Size(),
-				})
-			}
-		}
-
-		for _, link := range atomEntry.Links {
-			if !strings.EqualFold(link.Rel, "enclosure") || link.Href == "" {
+			mediaURL := strings.TrimSpace(mediaThumbnail.URL)
+			if mediaURL == "" {
 				continue
 			}
+			if _, found := uniqueEnclosuresMap[mediaURL]; !found {
+				if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
+					slog.Debug("Unable to build absolute URL for media thumbnail",
+						slog.String("url", mediaThumbnail.URL),
+						slog.String("site_url", siteURL),
+						slog.Any("error", err),
+					)
+				} else {
+					uniqueEnclosuresMap[mediaAbsoluteURL] = true
+					entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+						URL:      mediaAbsoluteURL,
+						MimeType: mediaThumbnail.MimeType(),
+						Size:     mediaThumbnail.Size(),
+					})
+				}
+			}
+		}
 
-			if _, found := uniqueEnclosuresMap[link.Href]; !found {
-				uniqueEnclosuresMap[link.Href] = true
-				length, _ := strconv.ParseInt(link.Length, 10, 0)
-				entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
-					URL:      link.Href,
-					MimeType: link.Type,
-					Size:     length,
-				})
+		for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") {
+			absoluteEnclosureURL, err := urllib.AbsoluteURL(siteURL, link.Href)
+			if err != nil {
+				slog.Debug("Unable to resolve absolute URL for enclosure",
+					slog.String("enclosure_url", link.Href),
+					slog.String("entry_url", entry.URL),
+					slog.Any("error", err),
+				)
+			} else {
+				if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; !found {
+					uniqueEnclosuresMap[absoluteEnclosureURL] = true
+					length, _ := strconv.ParseInt(link.Length, 10, 0)
+					entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+						URL:      absoluteEnclosureURL,
+						MimeType: link.Type,
+						Size:     length,
+					})
+				}
 			}
 		}
 
 		for _, mediaContent := range atomEntry.AllMediaContents() {
-			if _, found := uniqueEnclosuresMap[mediaContent.URL]; !found {
-				uniqueEnclosuresMap[mediaContent.URL] = true
-				entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
-					URL:      mediaContent.URL,
-					MimeType: mediaContent.MimeType(),
-					Size:     mediaContent.Size(),
-				})
+			mediaURL := strings.TrimSpace(mediaContent.URL)
+			if mediaURL == "" {
+				continue
+			}
+			if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
+				slog.Debug("Unable to build absolute URL for media content",
+					slog.String("url", mediaContent.URL),
+					slog.String("site_url", siteURL),
+					slog.Any("error", err),
+				)
+			} else {
+				if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
+					uniqueEnclosuresMap[mediaAbsoluteURL] = true
+					entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+						URL:      mediaAbsoluteURL,
+						MimeType: mediaContent.MimeType(),
+						Size:     mediaContent.Size(),
+					})
+				}
 			}
 		}
 
 		for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
-			if _, found := uniqueEnclosuresMap[mediaPeerLink.URL]; !found {
-				uniqueEnclosuresMap[mediaPeerLink.URL] = true
-				entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
-					URL:      mediaPeerLink.URL,
-					MimeType: mediaPeerLink.MimeType(),
-					Size:     mediaPeerLink.Size(),
-				})
+			mediaURL := strings.TrimSpace(mediaPeerLink.URL)
+			if mediaURL == "" {
+				continue
+			}
+			if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
+				slog.Debug("Unable to build absolute URL for media peer link",
+					slog.String("url", mediaPeerLink.URL),
+					slog.String("site_url", siteURL),
+					slog.Any("error", err),
+				)
+			} else {
+				if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
+					uniqueEnclosuresMap[mediaAbsoluteURL] = true
+					entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
+						URL:      mediaAbsoluteURL,
+						MimeType: mediaPeerLink.MimeType(),
+						Size:     mediaPeerLink.Size(),
+					})
+				}
 			}
 		}
 

+ 119 - 35
internal/reader/atom/atom_10_test.go

@@ -1105,7 +1105,7 @@ func TestParseEntryWithEnclosures(t *testing.T) {
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
 	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
@@ -1140,6 +1140,89 @@ func TestParseEntryWithEnclosures(t *testing.T) {
 	}
 }
 
+func TestParseEntryWithRelativeEnclosureURL(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom">
+		<id>https://www.example.org/myfeed</id>
+		<title>My Podcast Feed</title>
+		<link href="https://example.org" />
+		<link rel="self" href="https://example.org/myfeed" />
+		<entry>
+			<id>https://www.example.org/entries/1</id>
+			<title>Atom 1.0</title>
+			<updated>2005-07-15T12:00:00Z</updated>
+			<link href="https://www.example.org/entries/1" />
+			<link rel="enclosure"
+					type="audio/mpeg"
+					title="MP3"
+					href="  /myaudiofile.mp3  "
+					length="1234" />
+			</content>
+		</entry>
+  	</feed>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Enclosures) != 1 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "https://example.org/myaudiofile.mp3" {
+		t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL)
+	}
+}
+
+func TestParseEntryWithDuplicateEnclosureURL(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom">
+		<id>http://www.example.org/myfeed</id>
+		<title>My Podcast Feed</title>
+		<link href="http://example.org" />
+		<link rel="self" href="http://example.org/myfeed" />
+		<entry>
+			<id>http://www.example.org/entries/1</id>
+			<title>Atom 1.0</title>
+			<updated>2005-07-15T12:00:00Z</updated>
+			<link href="http://www.example.org/entries/1" />
+			<link rel="enclosure"
+					type="audio/mpeg"
+					title="MP3"
+					href="http://www.example.org/myaudiofile.mp3"
+					length="1234" />
+			<link rel="enclosure"
+					type="audio/mpeg"
+					title="MP3"
+					href="   http://www.example.org/myaudiofile.mp3  "
+					length="1234" />
+			</content>
+		</entry>
+  	</feed>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Enclosures) != 1 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
+		t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL)
+	}
+}
+
 func TestParseEntryWithoutEnclosureURL(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 	<feed xmlns="http://www.w3.org/2005/Atom">
@@ -1334,20 +1417,25 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) {
 func TestParseMediaGroup(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 	<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
-		<id>http://www.example.org/myfeed</id>
+		<id>https://www.example.org/myfeed</id>
 		<title>My Video Feed</title>
 		<updated>2005-07-15T12:00:00Z</updated>
-		<link href="http://example.org" />
-		<link rel="self" href="http://example.org/myfeed" />
+		<link href="https://example.org" />
+		<link rel="self" href="https://example.org/myfeed" />
 		<entry>
-			<id>http://www.example.org/entries/1</id>
+			<id>https://www.example.org/entries/1</id>
 			<title>Some Video</title>
 			<updated>2005-07-15T12:00:00Z</updated>
-			<link href="http://www.example.org/entries/1" />
+			<link href="https://www.example.org/entries/1" />
 			<media:group>
 				<media:title>Another title</media:title>
 				<media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/>
-				<media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/>
+				<media:content url="   /v/efg  " type="application/x-shockwave-flash" width="640" height="390"/>
+				<media:content url="     " type="application/x-shockwave-flash" width="640" height="390"/>
+				<media:thumbnail url="https://www.example.org/duplicate-thumbnail.jpg" width="480" height="360"/>
+				<media:thumbnail url="https://www.example.org/duplicate-thumbnail.jpg" width="480" height="360"/>
+				<media:thumbnail url=" /thumbnail2.jpg   " width="480" height="360"/>
+				<media:thumbnail url="    " width="480" height="360"/>
 				<media:description>Some description
 A website: http://example.org/</media:description>
 			</media:group>
@@ -1360,18 +1448,10 @@ A website: http://example.org/</media:description>
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
-	}
-
-	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
-		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
-	}
-
-	if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` {
-		t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
-	if len(feed.Entries[0].Enclosures) != 2 {
+	if len(feed.Entries[0].Enclosures) != 4 {
 		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
 
@@ -1380,8 +1460,10 @@ A website: http://example.org/</media:description>
 		mimeType string
 		size     int64
 	}{
-		{"https://example.org/thumbnail.jpg", "image/*", 0},
+		{"https://www.example.org/duplicate-thumbnail.jpg", "image/*", 0},
+		{"https://example.org/thumbnail2.jpg", "image/*", 0},
 		{"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0},
+		{"https://example.org/v/efg", "application/x-shockwave-flash", 0},
 	}
 
 	for index, enclosure := range feed.Entries[0].Enclosures {
@@ -1402,19 +1484,26 @@ A website: http://example.org/</media:description>
 func TestParseMediaElements(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 	<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
-		<id>http://www.example.org/myfeed</id>
+		<id>https://www.example.org/myfeed</id>
 		<title>My Video Feed</title>
 		<updated>2005-07-15T12:00:00Z</updated>
-		<link href="http://example.org" />
-		<link rel="self" href="http://example.org/myfeed" />
+		<link href="https://example.org" />
+		<link rel="self" href="https://example.org/myfeed" />
 		<entry>
-			<id>http://www.example.org/entries/1</id>
+			<id>https://www.example.org/entries/1</id>
 			<title>Some Video</title>
 			<updated>2005-07-15T12:00:00Z</updated>
-			<link href="http://www.example.org/entries/1" />
+			<link href="https://www.example.org/entries/1" />
 			<media:title>Another title</media:title>
 			<media:content url="https://www.youtube.com/v/abcd" type="application/x-shockwave-flash" width="640" height="390"/>
-			<media:thumbnail url="https://example.org/thumbnail.jpg" width="480" height="360"/>
+			<media:content url="   /relative/media.mp4   " type="application/x-shockwave-flash" width="640" height="390"/>
+			<media:content url="      " type="application/x-shockwave-flash" width="640" height="390"/>
+			<media:thumbnail url="https://example.org/duplicated-thumbnail.jpg" width="480" height="360"/>
+			<media:thumbnail url="  https://example.org/duplicated-thumbnail.jpg  " width="480" height="360"/>
+			<media:thumbnail url="    " width="480" height="360"/>
+			<media:peerLink type="application/x-bittorrent" href="   http://www.example.org/sampleFile.torrent   " />
+			<media:peerLink type="application/x-bittorrent" href=" /sampleFile2.torrent" />
+			<media:peerLink type="application/x-bittorrent" href=" " />
 			<media:description>Some description
 A website: http://example.org/</media:description>
 		</entry>
@@ -1426,18 +1515,10 @@ A website: http://example.org/</media:description>
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
-	}
-
-	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
-		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
-	}
-
-	if feed.Entries[0].Content != `Some description<br>A website: <a href="http://example.org/">http://example.org/</a>` {
-		t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
-	if len(feed.Entries[0].Enclosures) != 2 {
+	if len(feed.Entries[0].Enclosures) != 5 {
 		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
 
@@ -1446,8 +1527,11 @@ A website: http://example.org/</media:description>
 		mimeType string
 		size     int64
 	}{
-		{"https://example.org/thumbnail.jpg", "image/*", 0},
+		{"https://example.org/duplicated-thumbnail.jpg", "image/*", 0},
 		{"https://www.youtube.com/v/abcd", "application/x-shockwave-flash", 0},
+		{"https://example.org/relative/media.mp4", "application/x-shockwave-flash", 0},
+		{"http://www.example.org/sampleFile.torrent", "application/x-bittorrent", 0},
+		{"https://example.org/sampleFile2.torrent", "application/x-bittorrent", 0},
 	}
 
 	for index, enclosure := range feed.Entries[0].Enclosures {

+ 15 - 0
internal/reader/atom/atom_common.go

@@ -96,6 +96,21 @@ func (a AtomLinks) firstLinkWithRelationAndType(relation string, contentTypes ..
 	return ""
 }
 
+func (a AtomLinks) findAllLinksWithRelation(relation string) []*AtomLink {
+	var links []*AtomLink
+
+	for _, link := range a {
+		if strings.EqualFold(link.Rel, relation) {
+			link.Href = strings.TrimSpace(link.Href)
+			if link.Href != "" {
+				links = append(links, link)
+			}
+		}
+	}
+
+	return links
+}
+
 // The "atom:category" element conveys information about a category
 // associated with an entry or feed.  This specification assigns no
 // meaning to the content (if any) of this element.

+ 36 - 0
internal/reader/json/parser_test.go

@@ -848,6 +848,42 @@ func TestParseFeedIcon(t *testing.T) {
 	}
 }
 
+func TestParseFeedWithRelativeAttachmentURL(t *testing.T) {
+	data := `{
+		"version": "https://jsonfeed.org/version/1",
+		"title": "My Example Feed",
+		"home_page_url": "https://example.org/",
+		"feed_url": "https://example.org/feed.json",
+		"items": [
+			{
+				"id": "2",
+				"content_text": "This is a second item.",
+				"url": "https://example.org/second-item",
+				"attachments": [
+					{
+						"url": "   /attachment.mp3  ",
+						"mime_type": "audio/mpeg",
+						"size_in_bytes": 123456
+					}
+				]
+			}
+		]
+	}`
+
+	feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries[0].Enclosures) != 1 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "https://example.org/attachment.mp3" {
+		t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL)
+	}
+}
+
 func TestParseInvalidJSON(t *testing.T) {
 	data := `garbage`
 	_, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))

+ 67 - 24
internal/reader/rss/adapter.go

@@ -72,7 +72,7 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed {
 		entry := model.NewEntry()
 		entry.Date = findEntryDate(&item)
 		entry.Content = findEntryContent(&item)
-		entry.Enclosures = findEntryEnclosures(&item)
+		entry.Enclosures = findEntryEnclosures(&item, feed.SiteURL)
 
 		// Populate the entry URL.
 		entryURL := findEntryURL(&item)
@@ -245,18 +245,30 @@ func findEntryAuthor(rssItem *RSSItem) string {
 	return strings.TrimSpace(sanitizer.StripTags(author))
 }
 
-func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList {
+func findEntryEnclosures(rssItem *RSSItem, siteURL string) model.EnclosureList {
 	enclosures := make(model.EnclosureList, 0)
 	duplicates := make(map[string]bool)
 
 	for _, mediaThumbnail := range rssItem.AllMediaThumbnails() {
-		if _, found := duplicates[mediaThumbnail.URL]; !found {
-			duplicates[mediaThumbnail.URL] = true
-			enclosures = append(enclosures, &model.Enclosure{
-				URL:      mediaThumbnail.URL,
-				MimeType: mediaThumbnail.MimeType(),
-				Size:     mediaThumbnail.Size(),
-			})
+		mediaURL := strings.TrimSpace(mediaThumbnail.URL)
+		if mediaURL == "" {
+			continue
+		}
+		if _, found := duplicates[mediaURL]; !found {
+			if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
+				slog.Debug("Unable to build absolute URL for media thumbnail",
+					slog.String("url", mediaThumbnail.URL),
+					slog.String("site_url", siteURL),
+					slog.Any("error", err),
+				)
+			} else {
+				duplicates[mediaAbsoluteURL] = true
+				enclosures = append(enclosures, &model.Enclosure{
+					URL:      mediaAbsoluteURL,
+					MimeType: mediaThumbnail.MimeType(),
+					Size:     mediaThumbnail.Size(),
+				})
+			}
 		}
 	}
 
@@ -265,15 +277,20 @@ func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList {
 
 		if rssItem.FeedBurnerEnclosureLink != "" {
 			filename := path.Base(rssItem.FeedBurnerEnclosureLink)
-			if strings.Contains(enclosureURL, filename) {
+			if strings.HasSuffix(enclosureURL, filename) {
 				enclosureURL = rssItem.FeedBurnerEnclosureLink
 			}
 		}
 
+		enclosureURL = strings.TrimSpace(enclosureURL)
 		if enclosureURL == "" {
 			continue
 		}
 
+		if absoluteEnclosureURL, err := urllib.AbsoluteURL(siteURL, enclosureURL); err == nil {
+			enclosureURL = absoluteEnclosureURL
+		}
+
 		if _, found := duplicates[enclosureURL]; !found {
 			duplicates[enclosureURL] = true
 
@@ -286,24 +303,50 @@ func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList {
 	}
 
 	for _, mediaContent := range rssItem.AllMediaContents() {
-		if _, found := duplicates[mediaContent.URL]; !found {
-			duplicates[mediaContent.URL] = true
-			enclosures = append(enclosures, &model.Enclosure{
-				URL:      mediaContent.URL,
-				MimeType: mediaContent.MimeType(),
-				Size:     mediaContent.Size(),
-			})
+		mediaURL := strings.TrimSpace(mediaContent.URL)
+		if mediaURL == "" {
+			continue
+		}
+		if _, found := duplicates[mediaURL]; !found {
+			mediaURL := strings.TrimSpace(mediaContent.URL)
+			if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
+				slog.Debug("Unable to build absolute URL for media content",
+					slog.String("url", mediaContent.URL),
+					slog.String("site_url", siteURL),
+					slog.Any("error", err),
+				)
+			} else {
+				duplicates[mediaAbsoluteURL] = true
+				enclosures = append(enclosures, &model.Enclosure{
+					URL:      mediaAbsoluteURL,
+					MimeType: mediaContent.MimeType(),
+					Size:     mediaContent.Size(),
+				})
+			}
 		}
 	}
 
 	for _, mediaPeerLink := range rssItem.AllMediaPeerLinks() {
-		if _, found := duplicates[mediaPeerLink.URL]; !found {
-			duplicates[mediaPeerLink.URL] = true
-			enclosures = append(enclosures, &model.Enclosure{
-				URL:      mediaPeerLink.URL,
-				MimeType: mediaPeerLink.MimeType(),
-				Size:     mediaPeerLink.Size(),
-			})
+		mediaURL := strings.TrimSpace(mediaPeerLink.URL)
+		if mediaURL == "" {
+			continue
+		}
+		if _, found := duplicates[mediaURL]; !found {
+			mediaURL := strings.TrimSpace(mediaPeerLink.URL)
+			if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
+				slog.Debug("Unable to build absolute URL for media peer link",
+					slog.String("url", mediaPeerLink.URL),
+					slog.String("site_url", siteURL),
+					slog.Any("error", err),
+				)
+			} else {
+				duplicates[mediaAbsoluteURL] = true
+				enclosures = append(enclosures, &model.Enclosure{
+					URL:      mediaAbsoluteURL,
+					MimeType: mediaPeerLink.MimeType(),
+					Size:     mediaPeerLink.Size(),
+				})
+			}
 		}
 	}
 

+ 135 - 35
internal/reader/rss/parser_test.go

@@ -1016,15 +1016,11 @@ func TestParseEntryWithEnclosures(t *testing.T) {
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
-	}
-
-	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
-		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
 	if len(feed.Entries[0].Enclosures) != 1 {
-		t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
 
 	if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
@@ -1065,15 +1061,11 @@ func TestParseEntryWithIncorrectEnclosureLength(t *testing.T) {
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
-	}
-
-	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
-		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
 	if len(feed.Entries[0].Enclosures) != 2 {
-		t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
 
 	if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
@@ -1093,6 +1085,39 @@ func TestParseEntryWithIncorrectEnclosureLength(t *testing.T) {
 	}
 }
 
+func TestParseEntryWithDuplicatedEnclosureURL(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0">
+		<channel>
+		<title>My Podcast Feed</title>
+		<link>http://example.org</link>
+		<item>
+			<title>Podcasting with RSS</title>
+			<link>http://www.example.org/entries/1</link>
+			<enclosure url="http://www.example.org/myaudiofile.mp3" type="audio/mpeg" />
+			<enclosure url="   http://www.example.org/myaudiofile.mp3   " type="audio/mpeg" />
+		</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Enclosures) != 1 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
+		t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+	}
+}
+
 func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">
@@ -1106,7 +1131,7 @@ func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
 			<description>An overview of RSS podcasting</description>
 			<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
 			<guid isPermaLink="true">http://www.example.org/entries/1</guid>
-			<enclosure url="" length="0"/>
+			<enclosure url=" " length="0"/>
 		</item>
 		</channel>
 		</rss>`
@@ -1117,15 +1142,47 @@ func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
-	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
-		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+	if len(feed.Entries[0].Enclosures) != 0 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
+}
 
-	if len(feed.Entries[0].Enclosures) != 0 {
-		t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+func TestParseEntryWithRelativeEnclosureURL(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0">
+		<channel>
+		<title>My Podcast Feed</title>
+		<link>http://example.org</link>
+		<author>some.email@example.org</author>
+		<item>
+			<title>Podcasting with RSS</title>
+			<link>http://www.example.org/entries/1</link>
+			<description>An overview of RSS podcasting</description>
+			<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
+			<guid isPermaLink="true">http://www.example.org/entries/1</guid>
+			<enclosure url=" /files/file.mp3  "/>
+		</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Enclosures) != 1 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "http://example.org/files/file.mp3" {
+		t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL)
 	}
 }
 
@@ -1154,15 +1211,11 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
-	}
-
-	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
-		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
 	if len(feed.Entries[0].Enclosures) != 1 {
-		t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
 
 	if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
@@ -1178,6 +1231,42 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
 	}
 }
 
+func TestParseEntryWithFeedBurnerEnclosuresAndRelativeURL(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
+		<channel>
+		<title>My Example Feed</title>
+		<link>http://example.org</link>
+		<item>
+			<title>Example Item</title>
+			<link>http://www.example.org/entries/1</link>
+			<enclosure
+				url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
+				length="76192460"
+				type="audio/mpeg" />
+			<feedburner:origEnclosureLink>/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
+		</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Enclosures) != 1 {
+		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
+		t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+	}
+}
+
 func TestParseEntryWithRelativeURL(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">
@@ -1389,7 +1478,7 @@ func TestParseEntryWithMediaGroup(t *testing.T) {
 		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
 		<channel>
 		<title>My Example Feed</title>
-		<link>http://example.org</link>
+		<link>https://example.org</link>
 		<item>
 			<title>Example Item</title>
 			<link>http://www.example.org/entries/1</link>
@@ -1400,7 +1489,9 @@ func TestParseEntryWithMediaGroup(t *testing.T) {
 				<media:content type="application/x-bittorrent" url="https://example.org/file2.torrent" isDefault="true"></media:content>
 				<media:content type="application/x-bittorrent" url="https://example.org/file3.torrent"></media:content>
 				<media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content>
-				<media:content type="application/x-bittorrent" url="https://example.org/file5.torrent" fileSize="42"></media:content>
+				<media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content>
+				<media:content type="application/x-bittorrent" url=" file5.torrent  " fileSize="42"></media:content>
+				<media:content type="application/x-bittorrent" url="  " fileSize="42"></media:content>
 				<media:rating>nonadult</media:rating>
 			</media:group>
 			<media:thumbnail url="https://example.org/image.jpg" height="122" width="223"></media:thumbnail>
@@ -1453,15 +1544,19 @@ func TestParseEntryWithMediaContent(t *testing.T) {
 		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
 		<channel>
 		<title>My Example Feed</title>
-		<link>http://example.org</link>
+		<link>https://example.org</link>
 		<item>
 			<title>Example Item</title>
 			<link>http://www.example.org/entries/1</link>
 			<media:thumbnail url="https://example.org/thumbnail.jpg" />
+			<media:thumbnail url="https://example.org/thumbnail.jpg" />
+			<media:thumbnail url=" thumbnail.jpg  " />
+			<media:thumbnail url="   " />
 			<media:content url="https://example.org/media1.jpg" medium="image">
 				<media:title type="html">Some Title for Media 1</media:title>
 			</media:content>
-			<media:content url="https://example.org/media2.jpg" medium="image" />
+			<media:content url="   /media2.jpg   " medium="image" />
+			<media:content url="    " medium="image" />
 		</item>
 		</channel>
 		</rss>`
@@ -1472,9 +1567,9 @@ func TestParseEntryWithMediaContent(t *testing.T) {
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
-	if len(feed.Entries[0].Enclosures) != 3 {
+	if len(feed.Entries[0].Enclosures) != 4 {
 		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
 
@@ -1483,6 +1578,7 @@ func TestParseEntryWithMediaContent(t *testing.T) {
 		mimeType string
 		size     int64
 	}{
+		{"https://example.org/thumbnail.jpg", "image/*", 0},
 		{"https://example.org/thumbnail.jpg", "image/*", 0},
 		{"https://example.org/media1.jpg", "image/*", 0},
 		{"https://example.org/media2.jpg", "image/*", 0},
@@ -1508,11 +1604,14 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
 		<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
 		<channel>
 		<title>My Example Feed</title>
-		<link>http://example.org</link>
+		<link>https://website.example.org</link>
 		<item>
 			<title>Example Item</title>
 			<link>http://www.example.org/entries/1</link>
-			<media:peerLink type="application/x-bittorrent" href="http://www.example.org/file.torrent" />
+			<media:peerLink type="application/x-bittorrent" href="https://www.example.org/file.torrent" />
+			<media:peerLink type="application/x-bittorrent" href="https://www.example.org/file.torrent" />
+			<media:peerLink type="application/x-bittorrent" href="  file2.torrent   " />
+			<media:peerLink type="application/x-bittorrent" href="    " />
 		</item>
 		</channel>
 		</rss>`
@@ -1523,10 +1622,10 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
 	}
 
 	if len(feed.Entries) != 1 {
-		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+		t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
 	}
 
-	if len(feed.Entries[0].Enclosures) != 1 {
+	if len(feed.Entries[0].Enclosures) != 2 {
 		t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
 	}
 
@@ -1535,7 +1634,8 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
 		mimeType string
 		size     int64
 	}{
-		{"http://www.example.org/file.torrent", "application/x-bittorrent", 0},
+		{"https://www.example.org/file.torrent", "application/x-bittorrent", 0},
+		{"https://website.example.org/file2.torrent", "application/x-bittorrent", 0},
 	}
 
 	for index, enclosure := range feed.Entries[0].Enclosures {