Browse Source

feat: take `Retry-After` header into consideration for rate limited feeds

Frédéric Guillot 1 year ago
parent
commit
e1050e21b5

+ 11 - 10
internal/model/feed.go

@@ -112,12 +112,13 @@ func (f *Feed) CheckedNow() {
 }
 
 // ScheduleNextCheck set "next_check_at" of a feed based on the scheduler selected from the configuration.
-func (f *Feed) ScheduleNextCheck(weeklyCount int, newTTL int) {
-	f.TTL = newTTL
+func (f *Feed) ScheduleNextCheck(weeklyCount int, refreshDelayInMinutes int) {
+	f.TTL = refreshDelayInMinutes
+
 	// Default to the global config Polling Frequency.
-	var intervalMinutes int
-	switch config.Opts.PollingScheduler() {
-	case SchedulerEntryFrequency:
+	intervalMinutes := config.Opts.SchedulerRoundRobinMinInterval()
+
+	if config.Opts.PollingScheduler() == SchedulerEntryFrequency {
 		if weeklyCount <= 0 {
 			intervalMinutes = config.Opts.SchedulerEntryFrequencyMaxInterval()
 		} else {
@@ -125,13 +126,13 @@ func (f *Feed) ScheduleNextCheck(weeklyCount int, newTTL int) {
 			intervalMinutes = int(math.Min(float64(intervalMinutes), float64(config.Opts.SchedulerEntryFrequencyMaxInterval())))
 			intervalMinutes = int(math.Max(float64(intervalMinutes), float64(config.Opts.SchedulerEntryFrequencyMinInterval())))
 		}
-	default:
-		intervalMinutes = config.Opts.SchedulerRoundRobinMinInterval()
 	}
-	// If the feed has a TTL defined, we use it to make sure we don't check it too often.
-	if newTTL > intervalMinutes && newTTL > 0 {
-		intervalMinutes = newTTL
+
+	// If the feed has a TTL or a Retry-After defined, we use it to make sure we don't check it too often.
+	if refreshDelayInMinutes > 0 && refreshDelayInMinutes > intervalMinutes {
+		intervalMinutes = refreshDelayInMinutes
 	}
+
 	f.NextCheckAt = time.Now().Add(time.Minute * time.Duration(intervalMinutes))
 }
 

+ 57 - 13
internal/model/feed_test.go

@@ -14,7 +14,7 @@ import (
 
 const (
 	largeWeeklyCount = 10080
-	noNewTTL         = 0
+	noRefreshDelay   = 0
 )
 
 func TestFeedCategorySetter(t *testing.T) {
@@ -76,7 +76,7 @@ func checkTargetInterval(t *testing.T, feed *Feed, targetInterval int, timeBefor
 	}
 }
 
-func TestFeedScheduleNextCheckDefault(t *testing.T) {
+func TestFeedScheduleNextCheckRoundRobinDefault(t *testing.T) {
 	os.Clearenv()
 
 	var err error
@@ -88,15 +88,60 @@ func TestFeedScheduleNextCheckDefault(t *testing.T) {
 
 	timeBefore := time.Now()
 	feed := &Feed{}
-	weeklyCount := 10
-	feed.ScheduleNextCheck(weeklyCount, noNewTTL)
+	feed.ScheduleNextCheck(0, noRefreshDelay)
 
 	if feed.NextCheckAt.IsZero() {
 		t.Error(`The next_check_at must be set`)
 	}
 
 	targetInterval := config.Opts.SchedulerRoundRobinMinInterval()
-	checkTargetInterval(t, feed, targetInterval, timeBefore, "default SchedulerRoundRobinMinInterval")
+	checkTargetInterval(t, feed, targetInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinDefault")
+}
+
+func TestFeedScheduleNextCheckRoundRobinWithRefreshDelayAboveMinInterval(t *testing.T) {
+	os.Clearenv()
+
+	var err error
+	parser := config.NewParser()
+	config.Opts, err = parser.ParseEnvironmentVariables()
+	if err != nil {
+		t.Fatalf(`Parsing failure: %v`, err)
+	}
+
+	timeBefore := time.Now()
+	feed := &Feed{}
+
+	feed.ScheduleNextCheck(0, config.Opts.SchedulerRoundRobinMinInterval()+30)
+
+	if feed.NextCheckAt.IsZero() {
+		t.Error(`The next_check_at must be set`)
+	}
+
+	expectedInterval := config.Opts.SchedulerRoundRobinMinInterval() + 30
+	checkTargetInterval(t, feed, expectedInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinWithRefreshDelayAboveMinInterval")
+}
+
+func TestFeedScheduleNextCheckRoundRobinWithRefreshDelayBelowMinInterval(t *testing.T) {
+	os.Clearenv()
+
+	var err error
+	parser := config.NewParser()
+	config.Opts, err = parser.ParseEnvironmentVariables()
+	if err != nil {
+		t.Fatalf(`Parsing failure: %v`, err)
+	}
+
+	timeBefore := time.Now()
+	feed := &Feed{}
+
+	feed.ScheduleNextCheck(0, config.Opts.SchedulerRoundRobinMinInterval()-30)
+
+	if feed.NextCheckAt.IsZero() {
+		t.Error(`The next_check_at must be set`)
+	}
+
+	expectedInterval := config.Opts.SchedulerRoundRobinMinInterval()
+	checkTargetInterval(t, feed, expectedInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinWithRefreshDelayBelowMinInterval")
 }
 
 func TestFeedScheduleNextCheckRoundRobinMinInterval(t *testing.T) {
@@ -114,15 +159,14 @@ func TestFeedScheduleNextCheckRoundRobinMinInterval(t *testing.T) {
 
 	timeBefore := time.Now()
 	feed := &Feed{}
-	weeklyCount := 100
-	feed.ScheduleNextCheck(weeklyCount, noNewTTL)
+	feed.ScheduleNextCheck(0, noRefreshDelay)
 
 	if feed.NextCheckAt.IsZero() {
 		t.Error(`The next_check_at must be set`)
 	}
 
-	targetInterval := minInterval
-	checkTargetInterval(t, feed, targetInterval, timeBefore, "round robin min interval")
+	expectedInterval := minInterval
+	checkTargetInterval(t, feed, expectedInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinMinInterval")
 }
 
 func TestFeedScheduleNextCheckEntryFrequencyMaxInterval(t *testing.T) {
@@ -144,7 +188,7 @@ func TestFeedScheduleNextCheckEntryFrequencyMaxInterval(t *testing.T) {
 	feed := &Feed{}
 	// Use a very small weekly count to trigger the max interval
 	weeklyCount := 1
-	feed.ScheduleNextCheck(weeklyCount, noNewTTL)
+	feed.ScheduleNextCheck(weeklyCount, noRefreshDelay)
 
 	if feed.NextCheckAt.IsZero() {
 		t.Error(`The next_check_at must be set`)
@@ -173,7 +217,7 @@ func TestFeedScheduleNextCheckEntryFrequencyMaxIntervalZeroWeeklyCount(t *testin
 	feed := &Feed{}
 	// Use a very small weekly count to trigger the max interval
 	weeklyCount := 0
-	feed.ScheduleNextCheck(weeklyCount, noNewTTL)
+	feed.ScheduleNextCheck(weeklyCount, noRefreshDelay)
 
 	if feed.NextCheckAt.IsZero() {
 		t.Error(`The next_check_at must be set`)
@@ -202,7 +246,7 @@ func TestFeedScheduleNextCheckEntryFrequencyMinInterval(t *testing.T) {
 	feed := &Feed{}
 	// Use a very large weekly count to trigger the min interval
 	weeklyCount := largeWeeklyCount
-	feed.ScheduleNextCheck(weeklyCount, noNewTTL)
+	feed.ScheduleNextCheck(weeklyCount, noRefreshDelay)
 
 	if feed.NextCheckAt.IsZero() {
 		t.Error(`The next_check_at must be set`)
@@ -228,7 +272,7 @@ func TestFeedScheduleNextCheckEntryFrequencyFactor(t *testing.T) {
 	timeBefore := time.Now()
 	feed := &Feed{}
 	weeklyCount := 7
-	feed.ScheduleNextCheck(weeklyCount, noNewTTL)
+	feed.ScheduleNextCheck(weeklyCount, noRefreshDelay)
 
 	if feed.NextCheckAt.IsZero() {
 		t.Error(`The next_check_at must be set`)

+ 22 - 0
internal/reader/fetcher/response_handler.go

@@ -13,7 +13,9 @@ import (
 	"net/http"
 	"net/url"
 	"os"
+	"strconv"
 	"strings"
+	"time"
 
 	"miniflux.app/v2/internal/locale"
 )
@@ -51,6 +53,26 @@ func (r *ResponseHandler) ETag() string {
 	return r.httpResponse.Header.Get("ETag")
 }
 
+func (r *ResponseHandler) ParseRetryDelay() int {
+	retryAfterHeaderValue := r.httpResponse.Header.Get("Retry-After")
+	if retryAfterHeaderValue != "" {
+		// First, try to parse as an integer (number of seconds)
+		if seconds, err := strconv.Atoi(retryAfterHeaderValue); err == nil {
+			return seconds
+		}
+
+		// If not an integer, try to parse as an HTTP-date
+		if t, err := time.Parse(time.RFC1123, retryAfterHeaderValue); err == nil {
+			return int(time.Until(t).Seconds())
+		}
+	}
+	return 0
+}
+
+func (r *ResponseHandler) IsRateLimited() bool {
+	return r.httpResponse.StatusCode == http.StatusTooManyRequests
+}
+
 func (r *ResponseHandler) IsModified(lastEtagValue, lastModifiedValue string) bool {
 	if r.httpResponse.StatusCode == http.StatusNotModified {
 		return false

+ 35 - 0
internal/reader/fetcher/response_handler_test.go

@@ -6,6 +6,7 @@ package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
 import (
 	"net/http"
 	"testing"
+	"time"
 )
 
 func TestIsModified(t *testing.T) {
@@ -67,3 +68,37 @@ func TestIsModified(t *testing.T) {
 		})
 	}
 }
+
+func TestRetryDelay(t *testing.T) {
+	var testCases = map[string]struct {
+		RetryAfterHeader string
+		ExpectedDelay    int
+	}{
+		"Empty header": {
+			RetryAfterHeader: "",
+			ExpectedDelay:    0,
+		},
+		"Integer value": {
+			RetryAfterHeader: "42",
+			ExpectedDelay:    42,
+		},
+		"HTTP-date": {
+			RetryAfterHeader: time.Now().Add(42 * time.Second).Format(time.RFC1123),
+			ExpectedDelay:    41,
+		},
+	}
+	for name, tc := range testCases {
+		t.Run(name, func(tt *testing.T) {
+			header := http.Header{}
+			header.Add("Retry-After", tc.RetryAfterHeader)
+			rh := ResponseHandler{
+				httpResponse: &http.Response{
+					Header: header,
+				},
+			}
+			if tc.ExpectedDelay != rh.ParseRetryDelay() {
+				tt.Errorf("Expected %d, got %d for scenario %q", tc.ExpectedDelay, rh.ParseRetryDelay(), name)
+			}
+		})
+	}
+}

+ 18 - 5
internal/reader/handler/handler.go

@@ -210,7 +210,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 	}
 
 	weeklyEntryCount := 0
-	newTTL := 0
+	refreshDelayInMinutes := 0
 	if config.Opts.PollingScheduler() == model.SchedulerEntryFrequency {
 		var weeklyCountErr error
 		weeklyEntryCount, weeklyCountErr = store.WeeklyFeedEntryCount(userID, feedID)
@@ -220,7 +220,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 	}
 
 	originalFeed.CheckedNow()
-	originalFeed.ScheduleNextCheck(weeklyEntryCount, newTTL)
+	originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes)
 
 	requestBuilder := fetcher.NewRequestBuilder()
 	requestBuilder.WithUsernameAndPassword(originalFeed.Username, originalFeed.Password)
@@ -241,6 +241,19 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 	responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(originalFeed.FeedURL))
 	defer responseHandler.Close()
 
+	if responseHandler.IsRateLimited() {
+		retryDelayInSeconds := responseHandler.ParseRetryDelay()
+		refreshDelayInMinutes = retryDelayInSeconds / 60
+		originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes)
+
+		slog.Warn("Feed is rate limited",
+			slog.String("feed_url", originalFeed.FeedURL),
+			slog.Int("retry_delay_in_seconds", retryDelayInSeconds),
+			slog.Int("refresh_delay_in_minutes", refreshDelayInMinutes),
+			slog.Time("new_next_check_at", originalFeed.NextCheckAt),
+		)
+	}
+
 	if localizedError := responseHandler.LocalizedError(); localizedError != nil {
 		slog.Warn("Unable to fetch feed", slog.String("feed_url", originalFeed.FeedURL), slog.Any("error", localizedError.Error()))
 		originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
@@ -283,15 +296,15 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 		}
 
 		// If the feed has a TTL defined, we use it to make sure we don't check it too often.
-		newTTL = updatedFeed.TTL
+		refreshDelayInMinutes = updatedFeed.TTL
 
 		// Set the next check at with updated arguments.
-		originalFeed.ScheduleNextCheck(weeklyEntryCount, newTTL)
+		originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes)
 
 		slog.Debug("Updated next check date",
 			slog.Int64("user_id", userID),
 			slog.Int64("feed_id", feedID),
-			slog.Int("ttl", newTTL),
+			slog.Int("refresh_delay_in_minutes", refreshDelayInMinutes),
 			slog.Time("new_next_check_at", originalFeed.NextCheckAt),
 		)