Quellcode durchsuchen

feat(processor): fetch YouTube watch time in bulk using the API

Frédéric Guillot vor 1 Jahr
Ursprung
Commit
369054b02d

+ 1 - 1
internal/reader/processor/bilibili.go

@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-package processor
+package processor // import "miniflux.app/v2/internal/reader/processor
 
 import (
 	"encoding/json"

+ 200 - 0
internal/reader/processor/filters.go

@@ -0,0 +1,200 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package processor // import "miniflux.app/v2/internal/reader/processor
+
+import (
+	"log/slog"
+	"regexp"
+	"slices"
+	"strings"
+	"time"
+
+	"miniflux.app/v2/internal/model"
+)
+
+func isBlockedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool {
+	if user.BlockFilterEntryRules != "" {
+		rules := strings.Split(user.BlockFilterEntryRules, "\n")
+		for _, rule := range rules {
+			parts := strings.SplitN(rule, "=", 2)
+
+			var match bool
+			switch parts[0] {
+			case "EntryDate":
+				datePattern := parts[1]
+				match = isDateMatchingPattern(entry.Date, datePattern)
+			case "EntryTitle":
+				match, _ = regexp.MatchString(parts[1], entry.Title)
+			case "EntryURL":
+				match, _ = regexp.MatchString(parts[1], entry.URL)
+			case "EntryCommentsURL":
+				match, _ = regexp.MatchString(parts[1], entry.CommentsURL)
+			case "EntryContent":
+				match, _ = regexp.MatchString(parts[1], entry.Content)
+			case "EntryAuthor":
+				match, _ = regexp.MatchString(parts[1], entry.Author)
+			case "EntryTag":
+				containsTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
+					match, _ = regexp.MatchString(parts[1], tag)
+					return match
+				})
+				if containsTag {
+					match = true
+				}
+			}
+
+			if match {
+				slog.Debug("Blocking entry based on rule",
+					slog.String("entry_url", entry.URL),
+					slog.Int64("feed_id", feed.ID),
+					slog.String("feed_url", feed.FeedURL),
+					slog.String("rule", rule),
+				)
+				return true
+			}
+		}
+	}
+
+	if feed.BlocklistRules == "" {
+		return false
+	}
+
+	compiledBlocklist, err := regexp.Compile(feed.BlocklistRules)
+	if err != nil {
+		slog.Debug("Failed on regexp compilation",
+			slog.String("pattern", feed.BlocklistRules),
+			slog.Any("error", err),
+		)
+		return false
+	}
+
+	containsBlockedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
+		return compiledBlocklist.MatchString(tag)
+	})
+
+	if compiledBlocklist.MatchString(entry.URL) || compiledBlocklist.MatchString(entry.Title) || compiledBlocklist.MatchString(entry.Author) || containsBlockedTag {
+		slog.Debug("Blocking entry based on rule",
+			slog.String("entry_url", entry.URL),
+			slog.Int64("feed_id", feed.ID),
+			slog.String("feed_url", feed.FeedURL),
+			slog.String("rule", feed.BlocklistRules),
+		)
+		return true
+	}
+
+	return false
+}
+
+func isAllowedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool {
+	if user.KeepFilterEntryRules != "" {
+		rules := strings.Split(user.KeepFilterEntryRules, "\n")
+		for _, rule := range rules {
+			parts := strings.SplitN(rule, "=", 2)
+
+			var match bool
+			switch parts[0] {
+			case "EntryDate":
+				datePattern := parts[1]
+				match = isDateMatchingPattern(entry.Date, datePattern)
+			case "EntryTitle":
+				match, _ = regexp.MatchString(parts[1], entry.Title)
+			case "EntryURL":
+				match, _ = regexp.MatchString(parts[1], entry.URL)
+			case "EntryCommentsURL":
+				match, _ = regexp.MatchString(parts[1], entry.CommentsURL)
+			case "EntryContent":
+				match, _ = regexp.MatchString(parts[1], entry.Content)
+			case "EntryAuthor":
+				match, _ = regexp.MatchString(parts[1], entry.Author)
+			case "EntryTag":
+				containsTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
+					match, _ = regexp.MatchString(parts[1], tag)
+					return match
+				})
+				if containsTag {
+					match = true
+				}
+			}
+
+			if match {
+				slog.Debug("Allowing entry based on rule",
+					slog.String("entry_url", entry.URL),
+					slog.Int64("feed_id", feed.ID),
+					slog.String("feed_url", feed.FeedURL),
+					slog.String("rule", rule),
+				)
+				return true
+			}
+		}
+		return false
+	}
+
+	if feed.KeeplistRules == "" {
+		return true
+	}
+
+	compiledKeeplist, err := regexp.Compile(feed.KeeplistRules)
+	if err != nil {
+		slog.Debug("Failed on regexp compilation",
+			slog.String("pattern", feed.KeeplistRules),
+			slog.Any("error", err),
+		)
+		return false
+	}
+	containsAllowedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
+		return compiledKeeplist.MatchString(tag)
+	})
+
+	if compiledKeeplist.MatchString(entry.URL) || compiledKeeplist.MatchString(entry.Title) || compiledKeeplist.MatchString(entry.Author) || containsAllowedTag {
+		slog.Debug("Allow entry based on rule",
+			slog.String("entry_url", entry.URL),
+			slog.Int64("feed_id", feed.ID),
+			slog.String("feed_url", feed.FeedURL),
+			slog.String("rule", feed.KeeplistRules),
+		)
+		return true
+	}
+	return false
+}
+
+func isDateMatchingPattern(entryDate time.Time, pattern string) bool {
+	if pattern == "future" {
+		return entryDate.After(time.Now())
+	}
+
+	parts := strings.SplitN(pattern, ":", 2)
+	if len(parts) != 2 {
+		return false
+	}
+
+	operator := parts[0]
+	dateStr := parts[1]
+
+	switch operator {
+	case "before":
+		targetDate, err := time.Parse("2006-01-02", dateStr)
+		if err != nil {
+			return false
+		}
+		return entryDate.Before(targetDate)
+	case "after":
+		targetDate, err := time.Parse("2006-01-02", dateStr)
+		if err != nil {
+			return false
+		}
+		return entryDate.After(targetDate)
+	case "between":
+		dates := strings.Split(dateStr, ",")
+		if len(dates) != 2 {
+			return false
+		}
+		startDate, err1 := time.Parse("2006-01-02", dates[0])
+		endDate, err2 := time.Parse("2006-01-02", dates[1])
+		if err1 != nil || err2 != nil {
+			return false
+		}
+		return entryDate.After(startDate) && entryDate.Before(endDate)
+	}
+	return false
+}

+ 1 - 1
internal/reader/processor/nebula.go

@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-package processor
+package processor // import "miniflux.app/v2/internal/reader/processor
 
 import (
 	"errors"

+ 1 - 1
internal/reader/processor/odysee.go

@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-package processor
+package processor // import "miniflux.app/v2/internal/reader/processor
 
 import (
 	"errors"

+ 5 - 276
internal/reader/processor/processor.go

@@ -1,13 +1,11 @@
 // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-package processor
+package processor // import "miniflux.app/v2/internal/reader/processor
 
 import (
 	"log/slog"
 	"regexp"
-	"slices"
-	"strings"
 	"time"
 
 	"github.com/tdewolff/minify/v2"
@@ -127,155 +125,15 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
 		entry.Content = sanitizer.Sanitize(pageBaseURL, entry.Content)
 
 		updateEntryReadingTime(store, feed, entry, entryIsNew, user)
-		filteredEntries = append(filteredEntries, entry)
-	}
-
-	feed.Entries = filteredEntries
-}
-
-func isBlockedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool {
-	if user.BlockFilterEntryRules != "" {
-		rules := strings.Split(user.BlockFilterEntryRules, "\n")
-		for _, rule := range rules {
-			parts := strings.SplitN(rule, "=", 2)
-
-			var match bool
-			switch parts[0] {
-			case "EntryDate":
-				datePattern := parts[1]
-				match = isDateMatchingPattern(entry.Date, datePattern)
-			case "EntryTitle":
-				match, _ = regexp.MatchString(parts[1], entry.Title)
-			case "EntryURL":
-				match, _ = regexp.MatchString(parts[1], entry.URL)
-			case "EntryCommentsURL":
-				match, _ = regexp.MatchString(parts[1], entry.CommentsURL)
-			case "EntryContent":
-				match, _ = regexp.MatchString(parts[1], entry.Content)
-			case "EntryAuthor":
-				match, _ = regexp.MatchString(parts[1], entry.Author)
-			case "EntryTag":
-				containsTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
-					match, _ = regexp.MatchString(parts[1], tag)
-					return match
-				})
-				if containsTag {
-					match = true
-				}
-			}
-
-			if match {
-				slog.Debug("Blocking entry based on rule",
-					slog.String("entry_url", entry.URL),
-					slog.Int64("feed_id", feed.ID),
-					slog.String("feed_url", feed.FeedURL),
-					slog.String("rule", rule),
-				)
-				return true
-			}
-		}
-	}
-
-	if feed.BlocklistRules == "" {
-		return false
-	}
-
-	compiledBlocklist, err := regexp.Compile(feed.BlocklistRules)
-	if err != nil {
-		slog.Debug("Failed on regexp compilation",
-			slog.String("pattern", feed.BlocklistRules),
-			slog.Any("error", err),
-		)
-		return false
-	}
-
-	containsBlockedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
-		return compiledBlocklist.MatchString(tag)
-	})
-
-	if compiledBlocklist.MatchString(entry.URL) || compiledBlocklist.MatchString(entry.Title) || compiledBlocklist.MatchString(entry.Author) || containsBlockedTag {
-		slog.Debug("Blocking entry based on rule",
-			slog.String("entry_url", entry.URL),
-			slog.Int64("feed_id", feed.ID),
-			slog.String("feed_url", feed.FeedURL),
-			slog.String("rule", feed.BlocklistRules),
-		)
-		return true
-	}
-
-	return false
-}
-
-func isAllowedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool {
-	if user.KeepFilterEntryRules != "" {
-		rules := strings.Split(user.KeepFilterEntryRules, "\n")
-		for _, rule := range rules {
-			parts := strings.SplitN(rule, "=", 2)
-
-			var match bool
-			switch parts[0] {
-			case "EntryDate":
-				datePattern := parts[1]
-				match = isDateMatchingPattern(entry.Date, datePattern)
-			case "EntryTitle":
-				match, _ = regexp.MatchString(parts[1], entry.Title)
-			case "EntryURL":
-				match, _ = regexp.MatchString(parts[1], entry.URL)
-			case "EntryCommentsURL":
-				match, _ = regexp.MatchString(parts[1], entry.CommentsURL)
-			case "EntryContent":
-				match, _ = regexp.MatchString(parts[1], entry.Content)
-			case "EntryAuthor":
-				match, _ = regexp.MatchString(parts[1], entry.Author)
-			case "EntryTag":
-				containsTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
-					match, _ = regexp.MatchString(parts[1], tag)
-					return match
-				})
-				if containsTag {
-					match = true
-				}
-			}
 
-			if match {
-				slog.Debug("Allowing entry based on rule",
-					slog.String("entry_url", entry.URL),
-					slog.Int64("feed_id", feed.ID),
-					slog.String("feed_url", feed.FeedURL),
-					slog.String("rule", rule),
-				)
-				return true
-			}
-		}
-		return false
+		filteredEntries = append(filteredEntries, entry)
 	}
 
-	if feed.KeeplistRules == "" {
-		return true
+	if user.ShowReadingTime && shouldFetchYouTubeWatchTimeInBulk() {
+		fetchYouTubeWatchTimeInBulk(filteredEntries)
 	}
 
-	compiledKeeplist, err := regexp.Compile(feed.KeeplistRules)
-	if err != nil {
-		slog.Debug("Failed on regexp compilation",
-			slog.String("pattern", feed.KeeplistRules),
-			slog.Any("error", err),
-		)
-		return false
-	}
-	containsAllowedTag := slices.ContainsFunc(entry.Tags, func(tag string) bool {
-		return compiledKeeplist.MatchString(tag)
-	})
-
-	if compiledKeeplist.MatchString(entry.URL) || compiledKeeplist.MatchString(entry.Title) || compiledKeeplist.MatchString(entry.Author) || containsAllowedTag {
-		slog.Debug("Allow entry based on rule",
-			slog.String("entry_url", entry.URL),
-			slog.Int64("feed_id", feed.ID),
-			slog.String("feed_url", feed.FeedURL),
-			slog.String("rule", feed.KeeplistRules),
-		)
-		return true
-	}
-	return false
+	feed.Entries = filteredEntries
 }
 
 // ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
@@ -358,94 +216,6 @@ func rewriteEntryURL(feed *model.Feed, entry *model.Entry) string {
 	return rewrittenURL
 }
 
-func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *model.Entry, entryIsNew bool, user *model.User) {
-	if !user.ShowReadingTime {
-		slog.Debug("Skip reading time estimation for this user", slog.Int64("user_id", user.ID))
-		return
-	}
-
-	if shouldFetchYouTubeWatchTime(entry) {
-		if entryIsNew {
-			watchTime, err := fetchYouTubeWatchTime(entry.URL)
-			if err != nil {
-				slog.Warn("Unable to fetch YouTube watch time",
-					slog.Int64("user_id", user.ID),
-					slog.Int64("entry_id", entry.ID),
-					slog.String("entry_url", entry.URL),
-					slog.Int64("feed_id", feed.ID),
-					slog.String("feed_url", feed.FeedURL),
-					slog.Any("error", err),
-				)
-			}
-			entry.ReadingTime = watchTime
-		} else {
-			entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
-		}
-	}
-
-	if shouldFetchNebulaWatchTime(entry) {
-		if entryIsNew {
-			watchTime, err := fetchNebulaWatchTime(entry.URL)
-			if err != nil {
-				slog.Warn("Unable to fetch Nebula watch time",
-					slog.Int64("user_id", user.ID),
-					slog.Int64("entry_id", entry.ID),
-					slog.String("entry_url", entry.URL),
-					slog.Int64("feed_id", feed.ID),
-					slog.String("feed_url", feed.FeedURL),
-					slog.Any("error", err),
-				)
-			}
-			entry.ReadingTime = watchTime
-		} else {
-			entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
-		}
-	}
-
-	if shouldFetchOdyseeWatchTime(entry) {
-		if entryIsNew {
-			watchTime, err := fetchOdyseeWatchTime(entry.URL)
-			if err != nil {
-				slog.Warn("Unable to fetch Odysee watch time",
-					slog.Int64("user_id", user.ID),
-					slog.Int64("entry_id", entry.ID),
-					slog.String("entry_url", entry.URL),
-					slog.Int64("feed_id", feed.ID),
-					slog.String("feed_url", feed.FeedURL),
-					slog.Any("error", err),
-				)
-			}
-			entry.ReadingTime = watchTime
-		} else {
-			entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
-		}
-	}
-
-	if shouldFetchBilibiliWatchTime(entry) {
-		if entryIsNew {
-			watchTime, err := fetchBilibiliWatchTime(entry.URL)
-			if err != nil {
-				slog.Warn("Unable to fetch Bilibili watch time",
-					slog.Int64("user_id", user.ID),
-					slog.Int64("entry_id", entry.ID),
-					slog.String("entry_url", entry.URL),
-					slog.Int64("feed_id", feed.ID),
-					slog.String("feed_url", feed.FeedURL),
-					slog.Any("error", err),
-				)
-			}
-			entry.ReadingTime = watchTime
-		} else {
-			entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
-		}
-	}
-
-	// Handle YT error case and non-YT entries.
-	if entry.ReadingTime == 0 {
-		entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
-	}
-}
-
 func isRecentEntry(entry *model.Entry) bool {
 	if config.Opts.FilterEntryMaxAgeDays() == 0 || entry.Date.After(time.Now().AddDate(0, 0, -config.Opts.FilterEntryMaxAgeDays())) {
 		return true
@@ -468,44 +238,3 @@ func minifyEntryContent(entryContent string) string {
 
 	return entryContent
 }
-
-func isDateMatchingPattern(entryDate time.Time, pattern string) bool {
-	if pattern == "future" {
-		return entryDate.After(time.Now())
-	}
-
-	parts := strings.SplitN(pattern, ":", 2)
-	if len(parts) != 2 {
-		return false
-	}
-
-	operator := parts[0]
-	dateStr := parts[1]
-
-	switch operator {
-	case "before":
-		targetDate, err := time.Parse("2006-01-02", dateStr)
-		if err != nil {
-			return false
-		}
-		return entryDate.Before(targetDate)
-	case "after":
-		targetDate, err := time.Parse("2006-01-02", dateStr)
-		if err != nil {
-			return false
-		}
-		return entryDate.After(targetDate)
-	case "between":
-		dates := strings.Split(dateStr, ",")
-		if len(dates) != 2 {
-			return false
-		}
-		startDate, err1 := time.Parse("2006-01-02", dates[0])
-		endDate, err2 := time.Parse("2006-01-02", dates[1])
-		if err1 != nil || err2 != nil {
-			return false
-		}
-		return entryDate.After(startDate) && entryDate.Before(endDate)
-	}
-	return false
-}

+ 63 - 0
internal/reader/processor/reading_time.go

@@ -0,0 +1,63 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package processor // import "miniflux.app/v2/internal/reader/processor
+
+import (
+	"log/slog"
+
+	"miniflux.app/v2/internal/model"
+	"miniflux.app/v2/internal/reader/readingtime"
+	"miniflux.app/v2/internal/storage"
+)
+
+func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *model.Entry, entryIsNew bool, user *model.User) {
+	if !user.ShowReadingTime {
+		slog.Debug("Skip reading time estimation for this user", slog.Int64("user_id", user.ID))
+		return
+	}
+
+	// Define a type for watch time fetching functions
+	type watchTimeFetcher func(string) (int, error)
+
+	// Define watch time fetching scenarios
+	watchTimeScenarios := []struct {
+		shouldFetch func(*model.Entry) bool
+		fetchFunc   watchTimeFetcher
+		platform    string
+	}{
+		{shouldFetchYouTubeWatchTimeForSingleEntry, fetchYouTubeWatchTimeForSingleEntry, "YouTube"},
+		{shouldFetchNebulaWatchTime, fetchNebulaWatchTime, "Nebula"},
+		{shouldFetchOdyseeWatchTime, fetchOdyseeWatchTime, "Odysee"},
+		{shouldFetchBilibiliWatchTime, fetchBilibiliWatchTime, "Bilibili"},
+	}
+
+	// Iterate through scenarios and attempt to fetch watch time
+	for _, scenario := range watchTimeScenarios {
+		if scenario.shouldFetch(entry) {
+			if entryIsNew {
+				if watchTime, err := scenario.fetchFunc(entry.URL); err != nil {
+					slog.Warn("Unable to fetch watch time",
+						slog.String("platform", scenario.platform),
+						slog.Int64("user_id", user.ID),
+						slog.Int64("entry_id", entry.ID),
+						slog.String("entry_url", entry.URL),
+						slog.Int64("feed_id", feed.ID),
+						slog.String("feed_url", feed.FeedURL),
+						slog.Any("error", err),
+					)
+				} else {
+					entry.ReadingTime = watchTime
+				}
+			} else {
+				entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
+			}
+			break
+		}
+	}
+
+	// Fallback to text-based reading time estimation
+	if entry.ReadingTime == 0 && entry.Content != "" {
+		entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
+	}
+}

+ 89 - 49
internal/reader/processor/youtube.go

@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-package processor
+package processor // import "miniflux.app/v2/internal/reader/processor
 
 import (
 	"encoding/json"
@@ -11,6 +11,7 @@ import (
 	"net/url"
 	"regexp"
 	"strconv"
+	"strings"
 	"time"
 
 	"github.com/PuerkitoBio/goquery"
@@ -25,24 +26,30 @@ var (
 	iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
 )
 
-func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
-	if !config.Opts.FetchYouTubeWatchTime() {
-		return false
-	}
-	matches := youtubeRegex.FindStringSubmatch(entry.URL)
-	urlMatchesYouTubePattern := len(matches) == 2
-	return urlMatchesYouTubePattern
+func isYouTubeVideoURL(websiteURL string) bool {
+	return len(youtubeRegex.FindStringSubmatch(websiteURL)) == 2
 }
 
-func fetchYouTubeWatchTime(websiteURL string) (int, error) {
-	if config.Opts.YouTubeApiKey() == "" {
-		return fetchYouTubeWatchTimeFromWebsite(websiteURL)
-	} else {
-		return fetchYouTubeWatchTimeFromApi(websiteURL)
+func getVideoIDFromYouTubeURL(websiteURL string) string {
+	parsedWebsiteURL, err := url.Parse(websiteURL)
+	if err != nil {
+		return ""
 	}
+
+	return parsedWebsiteURL.Query().Get("v")
+}
+
+func shouldFetchYouTubeWatchTimeForSingleEntry(entry *model.Entry) bool {
+	return config.Opts.FetchYouTubeWatchTime() && config.Opts.YouTubeApiKey() == "" && isYouTubeVideoURL(entry.URL)
+}
+
+func shouldFetchYouTubeWatchTimeInBulk() bool {
+	return config.Opts.FetchYouTubeWatchTime() && config.Opts.YouTubeApiKey() != ""
 }
 
-func fetchYouTubeWatchTimeFromWebsite(websiteURL string) (int, error) {
+func fetchYouTubeWatchTimeForSingleEntry(websiteURL string) (int, error) {
+	slog.Debug("Fetching YouTube watch time for a single entry", slog.String("website_url", websiteURL))
+
 	requestBuilder := fetcher.NewRequestBuilder()
 	requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
 	requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
@@ -60,31 +67,59 @@ func fetchYouTubeWatchTimeFromWebsite(websiteURL string) (int, error) {
 		return 0, docErr
 	}
 
-	durs, exists := doc.FindMatcher(goquery.Single(`meta[itemprop="duration"]`)).Attr("content")
+	htmlDuration, exists := doc.FindMatcher(goquery.Single(`meta[itemprop="duration"]`)).Attr("content")
 	if !exists {
-		return 0, errors.New("duration has not found")
+		return 0, errors.New("youtube: duration has not found")
 	}
 
-	dur, err := parseISO8601(durs)
+	parsedDuration, err := parseISO8601(htmlDuration)
 	if err != nil {
-		return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
+		return 0, fmt.Errorf("youtube: unable to parse duration %s: %v", htmlDuration, err)
 	}
 
-	return int(dur.Minutes()), nil
+	return int(parsedDuration.Minutes()), nil
 }
 
-func fetchYouTubeWatchTimeFromApi(websiteURL string) (int, error) {
-	requestBuilder := fetcher.NewRequestBuilder()
-	requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
-	requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
+func fetchYouTubeWatchTimeInBulk(entries []*model.Entry) {
+	var videosEntriesMapping = make(map[string]*model.Entry)
+	var videoIDs []string
 
-	parsedWebsiteURL, err := url.Parse(websiteURL)
+	for _, entry := range entries {
+		if !isYouTubeVideoURL(entry.URL) {
+			continue
+		}
+
+		youtubeVideoID := getVideoIDFromYouTubeURL(entry.URL)
+		if youtubeVideoID == "" {
+			continue
+		}
+
+		videosEntriesMapping[getVideoIDFromYouTubeURL(entry.URL)] = entry
+		videoIDs = append(videoIDs, youtubeVideoID)
+	}
+
+	if len(videoIDs) == 0 {
+		return
+	}
+
+	watchTimeMap, err := fetchYouTubeWatchTimeFromApiInBulk(videoIDs)
 	if err != nil {
-		return 0, fmt.Errorf("unable to parse URL: %v", err)
+		slog.Warn("Unable to fetch YouTube watch time in bulk", slog.Any("error", err))
+		return
+	}
+
+	for videoID, watchTime := range watchTimeMap {
+		if entry, ok := videosEntriesMapping[videoID]; ok {
+			entry.ReadingTime = int(watchTime.Minutes())
+		}
 	}
+}
+
+func fetchYouTubeWatchTimeFromApiInBulk(videoIDs []string) (map[string]time.Duration, error) {
+	slog.Debug("Fetching YouTube watch time in bulk", slog.Any("video_ids", videoIDs))
 
 	apiQuery := url.Values{}
-	apiQuery.Set("id", parsedWebsiteURL.Query().Get("v"))
+	apiQuery.Set("id", strings.Join(videoIDs, ","))
 	apiQuery.Set("key", config.Opts.YouTubeApiKey())
 	apiQuery.Set("part", "contentDetails")
 
@@ -95,37 +130,33 @@ func fetchYouTubeWatchTimeFromApi(websiteURL string) (int, error) {
 		RawQuery: apiQuery.Encode(),
 	}
 
+	requestBuilder := fetcher.NewRequestBuilder()
+	requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
+	requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
+
 	responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(apiURL.String()))
 	defer responseHandler.Close()
 
 	if localizedError := responseHandler.LocalizedError(); localizedError != nil {
-		slog.Warn("Unable to fetch contentDetails from YouTube API", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
-		return 0, localizedError.Error()
-	}
-
-	var videos struct {
-		Items []struct {
-			ContentDetails struct {
-				Duration string `json:"duration"`
-			} `json:"contentDetails"`
-		} `json:"items"`
+		slog.Warn("Unable to fetch contentDetails from YouTube API", slog.Any("error", localizedError.Error()))
+		return nil, localizedError.Error()
 	}
 
+	var videos youtubeVideoListResponse
 	if err := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())).Decode(&videos); err != nil {
-		return 0, fmt.Errorf("unable to decode JSON: %v", err)
-	}
-
-	if n := len(videos.Items); n != 1 {
-		return 0, fmt.Errorf("invalid items length: %d", n)
+		return nil, fmt.Errorf("youtube: unable to decode JSON: %v", err)
 	}
 
-	durs := videos.Items[0].ContentDetails.Duration
-	dur, err := parseISO8601(durs)
-	if err != nil {
-		return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
+	watchTimeMap := make(map[string]time.Duration)
+	for _, video := range videos.Items {
+		duration, err := parseISO8601(video.ContentDetails.Duration)
+		if err != nil {
+			slog.Warn("Unable to parse ISO8601 duration", slog.Any("error", err))
+			continue
+		}
+		watchTimeMap[video.ID] = duration
 	}
-
-	return int(dur.Minutes()), nil
+	return watchTimeMap, nil
 }
 
 func parseISO8601(from string) (time.Duration, error) {
@@ -135,7 +166,7 @@ func parseISO8601(from string) (time.Duration, error) {
 	if iso8601Regex.MatchString(from) {
 		match = iso8601Regex.FindStringSubmatch(from)
 	} else {
-		return 0, errors.New("could not parse duration string")
+		return 0, errors.New("youtube: could not parse duration string")
 	}
 
 	for i, name := range iso8601Regex.SubexpNames() {
@@ -157,9 +188,18 @@ func parseISO8601(from string) (time.Duration, error) {
 		case "second":
 			d += time.Duration(val) * time.Second
 		default:
-			return 0, fmt.Errorf("unknown field %s", name)
+			return 0, fmt.Errorf("youtube: unknown field %s", name)
 		}
 	}
 
 	return d, nil
 }
+
+type youtubeVideoListResponse struct {
+	Items []struct {
+		ID             string `json:"id"`
+		ContentDetails struct {
+			Duration string `json:"duration"`
+		} `json:"contentDetails"`
+	} `json:"items"`
+}

+ 34 - 0
internal/reader/processor/youtube_test.go

@@ -36,3 +36,37 @@ func TestParseISO8601(t *testing.T) {
 		}
 	}
 }
+
+func TestGetYouTubeVideoIDFromURL(t *testing.T) {
+	scenarios := []struct {
+		url      string
+		expected string
+	}{
+		{"https://www.youtube.com/watch?v=HLrqNhgdiC0", "HLrqNhgdiC0"},
+		{"https://www.youtube.com/watch?v=HLrqNhgdiC0&feature=youtu.be", "HLrqNhgdiC0"},
+		{"https://example.org/test", ""},
+	}
+	for _, tc := range scenarios {
+		result := getVideoIDFromYouTubeURL(tc.url)
+		if tc.expected != result {
+			t.Errorf(`Unexpected result, got %q for url %q`, result, tc.url)
+		}
+	}
+}
+
+func TestIsYouTubeVideoURL(t *testing.T) {
+	scenarios := []struct {
+		url      string
+		expected bool
+	}{
+		{"https://www.youtube.com/watch?v=HLrqNhgdiC0", true},
+		{"https://www.youtube.com/watch?v=HLrqNhgdiC0&feature=youtu.be", true},
+		{"https://example.org/test", false},
+	}
+	for _, tc := range scenarios {
+		result := isYouTubeVideoURL(tc.url)
+		if tc.expected != result {
+			t.Errorf(`Unexpected result, got %v for url %q`, result, tc.url)
+		}
+	}
+}