Bläddra i källkod

feat: remove the `ref` parameter from url

This is used by (at least) Ghost (https://forum.ghost.org/t/ref-parameter-being-added-to-links/38335)

Examples:
- https://blog.exploits.club/exploits-club-weekly-newsletter-66-mitigations-galore-dirtycow-revisited-program-analysis-for-uafs-and-more/
- https://labs.watchtowr.com/is-the-sofistication-in-the-room-with-us-x-forwarded-for-and-ivanti-connect-secure-cve-2025-22457/
jvoisin 1 år sedan
förälder
incheckning
ff2dfe977b

+ 1 - 1
internal/reader/processor/processor.go

@@ -52,7 +52,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
 			continue
 		}
 
-		if cleanedURL, err := urlcleaner.RemoveTrackingParameters(entry.URL); err == nil {
+		if cleanedURL, err := urlcleaner.RemoveTrackingParameters(feed.FeedURL, feed.SiteURL, entry.URL); err == nil {
 			entry.URL = cleanedURL
 		}
 

+ 2 - 1
internal/reader/sanitizer/sanitizer.go

@@ -217,7 +217,8 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
 					continue
 				}
 
-				if cleanedURL, err := urlcleaner.RemoveTrackingParameters(value); err == nil {
+				// TODO use feedURL instead of baseURL twice.
+				if cleanedURL, err := urlcleaner.RemoveTrackingParameters(baseURL, baseURL, value); err == nil {
 					value = cleanedURL
 				}
 			}

+ 26 - 1
internal/reader/urlcleaner/urlcleaner.go

@@ -89,7 +89,13 @@ var trackingParams = map[string]bool{
 	"_branch_referrer": true,
 }
 
-func RemoveTrackingParameters(inputURL string) (string, error) {
+// Outbound tracking parameters are appending the website's url to outbound links.
+var trackingParamsOutbound = map[string]bool{
+	// Ghost
+	"ref": true,
+}
+
+func RemoveTrackingParameters(baseUrl, feedUrl, inputURL string) (string, error) {
 	parsedURL, err := url.Parse(inputURL)
 	if err != nil {
 		return "", fmt.Errorf("urlcleaner: error parsing URL: %v", err)
@@ -99,6 +105,15 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
 		return inputURL, nil
 	}
 
+	parsedBaseUrl, err := url.Parse(baseUrl)
+	if err != nil {
+		return "", fmt.Errorf("urlcleaner: error parsing base URL: %v", err)
+	}
+	parsedFeedUrl, err := url.Parse(feedUrl)
+	if err != nil {
+		return "", fmt.Errorf("urlcleaner: error parsing feed URL: %v", err)
+	}
+
 	queryParams := parsedURL.Query()
 	hasTrackers := false
 
@@ -109,6 +124,16 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
 			queryParams.Del(param)
 			hasTrackers = true
 		}
+		if trackingParamsOutbound[lowerParam] {
+			// handle duplicate parameters like ?a=b&a=c&a=d…
+			for _, value := range queryParams[param] {
+				if value == parsedBaseUrl.Hostname() || value == parsedFeedUrl.Hostname() {
+					queryParams.Del(param)
+					hasTrackers = true
+					break
+				}
+			}
+		}
 	}
 
 	// Do not modify the URL if there are no tracking parameters

+ 39 - 1
internal/reader/urlcleaner/urlcleaner_test.go

@@ -14,6 +14,8 @@ func TestRemoveTrackingParams(t *testing.T) {
 		name             string
 		input            string
 		expected         string
+		baseUrl          string
+		feedUrl          string
 		strictComparison bool
 	}{
 		{
@@ -62,28 +64,64 @@ func TestRemoveTrackingParams(t *testing.T) {
 			input:    "https://example.com/page?name=John%20Doe&utm_source=newsletter",
 			expected: "https://example.com/page?name=John+Doe",
 		},
+		{
+			name:     "ref parameter for another url",
+			input:    "https://example.com/page?ref=test.com",
+			baseUrl:  "https://example.com/page",
+			expected: "https://example.com/page?ref=test.com",
+		},
+		{
+			name:     "ref parameter for feed url",
+			input:    "https://example.com/page?ref=feed.com",
+			baseUrl:  "https://example.com/page",
+			expected: "https://example.com/page",
+			feedUrl:  "http://feed.com",
+		},
+		{
+			name:     "ref parameter for site url",
+			input:    "https://example.com/page?ref=example.com",
+			baseUrl:  "https://example.com/page",
+			expected: "https://example.com/page",
+		},
+		{
+			name:     "ref parameter for base url",
+			input:    "https://example.com/page?ref=example.com",
+			expected: "https://example.com/page",
+			baseUrl:  "https://example.com",
+			feedUrl:  "https://feedburned.com/example",
+		},
+		{
+			name:     "ref parameter for base url on subdomain",
+			input:    "https://blog.exploits.club/some-path?ref=blog.exploits.club",
+			expected: "https://blog.exploits.club/some-path",
+			baseUrl:  "https://blog.exploits.club/some-path",
+			feedUrl:  "https://feedburned.com/exploit.club",
+		},
 		{
 			name:             "Non-standard URL parameter with no tracker",
 			input:            "https://example.com/foo.jpg?crop/1420x708/format/webp",
 			expected:         "https://example.com/foo.jpg?crop/1420x708/format/webp",
+			baseUrl:          "https://example.com/page",
 			strictComparison: true,
 		},
 		{
 			name:     "Invalid URL",
 			input:    "https://example|org/",
+			baseUrl:  "https://example.com/page",
 			expected: "",
 		},
 		{
 			name:             "Non-HTTP URL",
 			input:            "mailto:user@example.org",
 			expected:         "mailto:user@example.org",
+			baseUrl:          "https://example.com/page",
 			strictComparison: true,
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result, err := RemoveTrackingParameters(tt.input)
+			result, err := RemoveTrackingParameters(tt.baseUrl, tt.feedUrl, tt.input)
 			if tt.expected == "" {
 				if err == nil {
 					t.Errorf("Expected an error for invalid URL, but got none")