Browse Source

feat: use `Cache-Control` max-age and `Expires` headers to calculate next check

Frédéric Guillot 1 year ago
parent
commit
c45b51d1f8

+ 28 - 0
internal/reader/fetcher/response_handler.go

@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"io"
 	"log/slog"
+	"math"
 	"net"
 	"net/http"
 	"net/url"
@@ -53,6 +54,33 @@ func (r *ResponseHandler) ETag() string {
 	return r.httpResponse.Header.Get("ETag")
 }
 
+func (r *ResponseHandler) ExpiresInMinutes() int {
+	expiresHeaderValue := r.httpResponse.Header.Get("Expires")
+	if expiresHeaderValue != "" {
+		t, err := time.Parse(time.RFC1123, expiresHeaderValue)
+		if err == nil {
+			return int(math.Ceil(time.Until(t).Minutes()))
+		}
+	}
+	return 0
+}
+
+func (r *ResponseHandler) CacheControlMaxAgeInMinutes() int {
+	cacheControlHeaderValue := r.httpResponse.Header.Get("Cache-Control")
+	if cacheControlHeaderValue != "" {
+		for _, directive := range strings.Split(cacheControlHeaderValue, ",") {
+			directive = strings.TrimSpace(directive)
+			if strings.HasPrefix(directive, "max-age=") {
+				maxAge, err := strconv.Atoi(strings.TrimPrefix(directive, "max-age="))
+				if err == nil {
+					return int(math.Ceil(float64(maxAge) / 60))
+				}
+			}
+		}
+	}
+	return 0
+}
+
 func (r *ResponseHandler) ParseRetryDelay() int {
 	retryAfterHeaderValue := r.httpResponse.Header.Get("Retry-After")
 	if retryAfterHeaderValue != "" {

+ 72 - 0
internal/reader/fetcher/response_handler_test.go

@@ -102,3 +102,75 @@ func TestRetryDelay(t *testing.T) {
 		})
 	}
 }
+
+func TestExpiresInMinutes(t *testing.T) {
+	var testCases = map[string]struct {
+		ExpiresHeader   string
+		ExpectedMinutes int
+	}{
+		"Empty header": {
+			ExpiresHeader:   "",
+			ExpectedMinutes: 0,
+		},
+		"Valid Expires header": {
+			ExpiresHeader:   time.Now().Add(10 * time.Minute).Format(time.RFC1123),
+			ExpectedMinutes: 10,
+		},
+		"Invalid Expires header": {
+			ExpiresHeader:   "invalid-date",
+			ExpectedMinutes: 0,
+		},
+	}
+	for name, tc := range testCases {
+		t.Run(name, func(tt *testing.T) {
+			header := http.Header{}
+			header.Add("Expires", tc.ExpiresHeader)
+			rh := ResponseHandler{
+				httpResponse: &http.Response{
+					Header: header,
+				},
+			}
+			if tc.ExpectedMinutes != rh.ExpiresInMinutes() {
+				t.Errorf("Expected %d, got %d for scenario %q", tc.ExpectedMinutes, rh.ExpiresInMinutes(), name)
+			}
+		})
+	}
+}
+
+func TestCacheControlMaxAgeInMinutes(t *testing.T) {
+	var testCases = map[string]struct {
+		CacheControlHeader string
+		ExpectedMinutes    int
+	}{
+		"Empty header": {
+			CacheControlHeader: "",
+			ExpectedMinutes:    0,
+		},
+		"Valid max-age": {
+			CacheControlHeader: "max-age=600",
+			ExpectedMinutes:    10,
+		},
+		"Invalid max-age": {
+			CacheControlHeader: "max-age=invalid",
+			ExpectedMinutes:    0,
+		},
+		"Multiple directives": {
+			CacheControlHeader: "no-cache, max-age=300",
+			ExpectedMinutes:    5,
+		},
+	}
+	for name, tc := range testCases {
+		t.Run(name, func(tt *testing.T) {
+			header := http.Header{}
+			header.Add("Cache-Control", tc.CacheControlHeader)
+			rh := ResponseHandler{
+				httpResponse: &http.Response{
+					Header: header,
+				},
+			}
+			if tc.ExpectedMinutes != rh.CacheControlMaxAgeInMinutes() {
+				t.Errorf("Expected %d, got %d for scenario %q", tc.ExpectedMinutes, rh.CacheControlMaxAgeInMinutes(), name)
+			}
+		})
+	}
+}

+ 10 - 2
internal/reader/handler/handler.go

@@ -301,8 +301,12 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 			return localizedError
 		}
 
-		// If the feed has a TTL defined, we use it to make sure we don't check it too often.
-		refreshDelayInMinutes = updatedFeed.TTL
+		// Use the RSS TTL value, or the Cache-Control or Expires HTTP headers if available.
+		// Otherwise, we use the default value from the configuration (min interval parameter).
+		feedTTLValue := updatedFeed.TTL
+		cacheControlMaxAgeValue := responseHandler.CacheControlMaxAgeInMinutes()
+		expiresValue := responseHandler.ExpiresInMinutes()
+		refreshDelayInMinutes = max(feedTTLValue, cacheControlMaxAgeValue, expiresValue)
 
 		// Set the next check at with updated arguments.
 		originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes)
@@ -310,6 +314,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 		slog.Debug("Updated next check date",
 			slog.Int64("user_id", userID),
 			slog.Int64("feed_id", feedID),
+			slog.String("feed_url", originalFeed.FeedURL),
+			slog.Int("feed_ttl_minutes", feedTTLValue),
+			slog.Int("cache_control_max_age_in_minutes", cacheControlMaxAgeValue),
+			slog.Int("expires_in_minutes", expiresValue),
 			slog.Int("refresh_delay_in_minutes", refreshDelayInMinutes),
 			slog.Time("new_next_check_at", originalFeed.NextCheckAt),
 		)