Browse Source

refactor(subscription): combine `findSubscriptionsFromYouTubeChannelPage` and `findSubscriptionsFromYouTubePlaylistPage` functions

Julien Voisin 7 months ago
parent
commit
0b93d8abcc
2 changed files with 25 additions and 132 deletions
  1. 17 40
      internal/reader/subscription/finder.go
  2. 8 92
      internal/reader/subscription/finder_test.go

+ 17 - 40
internal/reader/subscription/finder.go

@@ -70,24 +70,15 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
 	}
 
 	// Step 2) Check if the website URL is a YouTube channel.
-	slog.Debug("Try to detect feeds from YouTube channel page", slog.String("website_url", websiteURL))
-	if subscriptions, localizedError := f.findSubscriptionsFromYouTubeChannelPage(websiteURL); localizedError != nil {
+	slog.Debug("Try to detect feeds for a YouTube page", slog.String("website_url", websiteURL))
+	if subscriptions, localizedError := f.findSubscriptionsFromYouTube(websiteURL); localizedError != nil {
 		return nil, localizedError
 	} else if len(subscriptions) > 0 {
-		slog.Debug("Subscriptions found from YouTube channel page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
+		slog.Debug("Subscriptions found from YouTube page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
 		return subscriptions, nil
 	}
 
-	// Step 3) Check if the website URL is a YouTube playlist.
-	slog.Debug("Try to detect feeds from YouTube playlist page", slog.String("website_url", websiteURL))
-	if subscriptions, localizedError := f.findSubscriptionsFromYouTubePlaylistPage(websiteURL); localizedError != nil {
-		return nil, localizedError
-	} else if len(subscriptions) > 0 {
-		slog.Debug("Subscriptions found from YouTube playlist page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
-		return subscriptions, nil
-	}
-
-	// Step 4) Parse web page to find feeds from HTML meta tags.
+	// Step 3) Parse web page to find feeds from HTML meta tags.
 	slog.Debug("Try to detect feeds from HTML meta tags",
 		slog.String("website_url", websiteURL),
 		slog.String("content_type", responseHandler.ContentType()),
@@ -99,7 +90,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
 		return subscriptions, nil
 	}
 
-	// Step 5) Check if the website URL can use RSS-Bridge.
+	// Step 4) Check if the website URL can use RSS-Bridge.
 	if rssBridgeURL != "" {
 		slog.Debug("Try to detect feeds with RSS-Bridge", slog.String("website_url", websiteURL))
 		if subscriptions, localizedError := f.findSubscriptionsFromRSSBridge(websiteURL, rssBridgeURL, rssBridgeToken); localizedError != nil {
@@ -110,7 +101,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
 		}
 	}
 
-	// Step 6) Check if the website has a known feed URL.
+	// Step 5) Check if the website has a known feed URL.
 	slog.Debug("Try to detect feeds from well-known URLs", slog.String("website_url", websiteURL))
 	if subscriptions, localizedError := f.findSubscriptionsFromWellKnownURLs(websiteURL); localizedError != nil {
 		return nil, localizedError
@@ -282,40 +273,26 @@ func (f *subscriptionFinder) findSubscriptionsFromRSSBridge(websiteURL, rssBridg
 	return subscriptions, nil
 }
 
-func (f *subscriptionFinder) findSubscriptionsFromYouTubeChannelPage(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
-	decodedUrl, err := url.Parse(websiteURL)
+func (f *subscriptionFinder) findSubscriptionsFromYouTube(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
+	decodedURL, err := url.Parse(websiteURL)
 	if err != nil {
 		return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err)
 	}
 
-	if !strings.HasSuffix(decodedUrl.Host, "youtube.com") {
-		slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL))
+	if !strings.HasSuffix(decodedURL.Host, "youtube.com") {
+		slog.Debug("YouTube feed discovery skipped: not a YouTube domain", slog.String("website_url", websiteURL))
 		return nil, nil
 	}
-
-	if _, channelID, found := strings.Cut(decodedUrl.Path, "channel/"); found {
+	if _, channelID, found := strings.Cut(decodedURL.Path, "channel/"); found {
 		feedURL := "https://www.youtube.com/feeds/videos.xml?channel_id=" + channelID
-		return Subscriptions{NewSubscription(websiteURL, feedURL, parser.FormatAtom)}, nil
+		return Subscriptions{NewSubscription(decodedURL.String(), feedURL, parser.FormatAtom)}, nil
 	}
 
-	return nil, nil
-}
-
-func (f *subscriptionFinder) findSubscriptionsFromYouTubePlaylistPage(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
-	decodedUrl, err := url.Parse(websiteURL)
-	if err != nil {
-		return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err)
-	}
-
-	if !strings.HasSuffix(decodedUrl.Host, "youtube.com") {
-		slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL))
-		return nil, nil
-	}
-
-	if (strings.HasPrefix(decodedUrl.Path, "/watch") && decodedUrl.Query().Has("list")) || strings.HasPrefix(decodedUrl.Path, "/playlist") {
-		playlistID := decodedUrl.Query().Get("list")
-		feedURL := "https://www.youtube.com/feeds/videos.xml?playlist_id=" + playlistID
-		return Subscriptions{NewSubscription(websiteURL, feedURL, parser.FormatAtom)}, nil
+	if strings.HasPrefix(decodedURL.Path, "/watch") || strings.HasPrefix(decodedURL.Path, "/playlist") {
+		if playlistID := decodedURL.Query().Get("list"); playlistID != "" {
+			feedURL := "https://www.youtube.com/feeds/videos.xml?playlist_id=" + playlistID
+			return Subscriptions{NewSubscription(decodedURL.String(), feedURL, parser.FormatAtom)}, nil
+		}
 	}
 
 	return nil, nil

+ 8 - 92
internal/reader/subscription/finder_test.go

@@ -8,7 +8,7 @@ import (
 	"testing"
 )
 
-func TestFindYoutubePlaylistFeed(t *testing.T) {
+func TestFindYoutubeFeed(t *testing.T) {
 	type testResult struct {
 		websiteURL     string
 		feedURL        string
@@ -34,7 +34,7 @@ func TestFindYoutubePlaylistFeed(t *testing.T) {
 		// Channel URL
 		{
 			websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
-			feedURL:    "",
+			feedURL:    "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
 		},
 		// Channel URL with name
 		{
@@ -56,93 +56,9 @@ func TestFindYoutubePlaylistFeed(t *testing.T) {
 			websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
 			feedURL:    "https://www.youtube.com/feeds/videos.xml?playlist_id=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
 		},
-		// Non-Youtube URL
+		// Empty playlist ID parameter
 		{
-			websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
-			feedURL:    "",
-		},
-		// Invalid URL
-		{
-			websiteURL:     "https://example|org/",
-			feedURL:        "",
-			discoveryError: true,
-		},
-	}
-
-	for _, scenario := range scenarios {
-		subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTubePlaylistPage(scenario.websiteURL)
-		if scenario.discoveryError {
-			if localizedError == nil {
-				t.Fatalf(`Parsing an invalid URL should return an error`)
-			}
-		}
-
-		if scenario.feedURL == "" {
-			if len(subscriptions) > 0 {
-				t.Fatalf(`Parsing a non-playlist URL should not return any subscription: %q`, scenario.websiteURL)
-			}
-		} else {
-			if localizedError != nil {
-				t.Fatalf(`Parsing a correctly formatted YouTube playlist page should not return any error: %v`, localizedError)
-			}
-
-			if len(subscriptions) != 1 {
-				t.Fatalf(`Incorrect number of subscriptions returned`)
-			}
-
-			if subscriptions[0].URL != scenario.feedURL {
-				t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL)
-			}
-		}
-	}
-}
-
-func TestFindYoutubeChannelFeed(t *testing.T) {
-	type testResult struct {
-		websiteURL     string
-		feedURL        string
-		discoveryError bool
-	}
-
-	scenarios := []testResult{
-		// Video URL
-		{
-			websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
-			feedURL:    "",
-		},
-		// Video URL with position argument
-		{
-			websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
-			feedURL:    "",
-		},
-		// Video URL with position argument
-		{
-			websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
-			feedURL:    "",
-		},
-		// Channel URL
-		{
-			websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
-			feedURL:    "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
-		},
-		// Channel URL with name
-		{
-			websiteURL: "https://www.youtube.com/@ABCDEFG",
-			feedURL:    "",
-		},
-		// Playlist URL
-		{
-			websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
-			feedURL:    "",
-		},
-		// Playlist URL with video ID
-		{
-			websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
-			feedURL:    "",
-		},
-		// Playlist URL with video ID and index argument
-		{
-			websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
+			websiteURL: "https://www.youtube.com/playlist?list=",
 			feedURL:    "",
 		},
 		// Non-Youtube URL
@@ -159,7 +75,7 @@ func TestFindYoutubeChannelFeed(t *testing.T) {
 	}
 
 	for _, scenario := range scenarios {
-		subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTubeChannelPage(scenario.websiteURL)
+		subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTube(scenario.websiteURL)
 		if scenario.discoveryError {
 			if localizedError == nil {
 				t.Fatalf(`Parsing an invalid URL should return an error`)
@@ -168,11 +84,11 @@ func TestFindYoutubeChannelFeed(t *testing.T) {
 
 		if scenario.feedURL == "" {
 			if len(subscriptions) > 0 {
-				t.Fatalf(`Parsing a non-channel URL should not return any subscription: %q`, scenario.websiteURL)
+				t.Fatalf(`Parsing an invalid URL should not return any subscription: %q -> %v`, scenario.websiteURL, subscriptions)
 			}
 		} else {
 			if localizedError != nil {
-				t.Fatalf(`Parsing a correctly formatted YouTube channel page should not return any error: %v`, localizedError)
+				t.Fatalf(`Parsing a correctly formatted YouTube playlist or channel page should not return any error: %v`, localizedError)
 			}
 
 			if len(subscriptions) != 1 {
@@ -180,7 +96,7 @@ func TestFindYoutubeChannelFeed(t *testing.T) {
 			}
 
 			if subscriptions[0].URL != scenario.feedURL {
-				t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL)
+				t.Errorf(`Unexpected feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL)
 			}
 		}
 	}