urlcleaner_test.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urlcleaner // import "miniflux.app/v2/internal/reader/urlcleaner"
  4. import (
  5. "net/url"
  6. "reflect"
  7. "testing"
  8. )
  9. func TestRemoveTrackingParams(t *testing.T) {
  10. tests := []struct {
  11. name string
  12. input string
  13. expected string
  14. baseURL string
  15. feedURL string
  16. strictComparison bool
  17. }{
  18. {
  19. name: "URL with tracking parameters",
  20. input: "https://example.com/page?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123",
  21. expected: "https://example.com/page?id=123",
  22. },
  23. {
  24. name: "URL with only tracking parameters",
  25. input: "https://example.com/page?utm_source=newsletter&utm_medium=email",
  26. expected: "https://example.com/page",
  27. },
  28. {
  29. name: "URL with no tracking parameters",
  30. input: "https://example.com/page?id=123&foo=bar",
  31. expected: "https://example.com/page?id=123&foo=bar",
  32. },
  33. {
  34. name: "URL with no parameters",
  35. input: "https://example.com/page",
  36. expected: "https://example.com/page",
  37. strictComparison: true,
  38. },
  39. {
  40. name: "URL with mixed case tracking parameters",
  41. input: "https://example.com/page?UTM_SOURCE=newsletter&utm_MEDIUM=email",
  42. expected: "https://example.com/page",
  43. },
  44. {
  45. name: "URL with tracking parameters and fragments",
  46. input: "https://example.com/page?id=123&utm_source=newsletter#section1",
  47. expected: "https://example.com/page?id=123#section1",
  48. },
  49. {
  50. name: "URL with only tracking parameters and fragments",
  51. input: "https://example.com/page?utm_source=newsletter#section1",
  52. expected: "https://example.com/page#section1",
  53. },
  54. {
  55. name: "URL with only one tracking parameter",
  56. input: "https://example.com/page?utm_source=newsletter",
  57. expected: "https://example.com/page",
  58. },
  59. {
  60. name: "URL with encoded characters",
  61. input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
  62. expected: "https://example.com/page?name=John+Doe",
  63. },
  64. {
  65. name: "ref parameter for another url",
  66. input: "https://example.com/page?ref=test.com",
  67. baseURL: "https://example.com/page",
  68. expected: "https://example.com/page?ref=test.com",
  69. },
  70. {
  71. name: "ref parameter for feed url",
  72. input: "https://example.com/page?ref=feed.com",
  73. baseURL: "https://example.com/page",
  74. expected: "https://example.com/page",
  75. feedURL: "http://feed.com",
  76. },
  77. {
  78. name: "ref parameter for site url",
  79. input: "https://example.com/page?ref=example.com",
  80. baseURL: "https://example.com/page",
  81. expected: "https://example.com/page",
  82. },
  83. {
  84. name: "ref parameter for base url",
  85. input: "https://example.com/page?ref=example.com",
  86. expected: "https://example.com/page",
  87. baseURL: "https://example.com",
  88. feedURL: "https://feedburned.com/example",
  89. },
  90. {
  91. name: "ref parameter for base url on subdomain",
  92. input: "https://blog.exploits.club/some-path?ref=blog.exploits.club",
  93. expected: "https://blog.exploits.club/some-path",
  94. baseURL: "https://blog.exploits.club/some-path",
  95. feedURL: "https://feedburned.com/exploit.club",
  96. },
  97. {
  98. name: "Non-standard URL parameter with no tracker",
  99. input: "https://example.com/foo.jpg?crop/1420x708/format/webp",
  100. expected: "https://example.com/foo.jpg?crop/1420x708/format/webp",
  101. baseURL: "https://example.com/page",
  102. strictComparison: true,
  103. },
  104. {
  105. name: "Invalid URL",
  106. input: "https://example|org/",
  107. baseURL: "https://example.com/page",
  108. expected: "",
  109. },
  110. {
  111. name: "Non-HTTP URL",
  112. input: "mailto:user@example.org",
  113. expected: "mailto:user@example.org",
  114. baseURL: "https://example.com/page",
  115. strictComparison: true,
  116. },
  117. {
  118. name: "Matomo tracking URL",
  119. input: "https://example.com/?mtm_campaign=2020_august_promo&mtm_source=newsletter&mtm_medium=email&mtm_content=primary-cta",
  120. expected: "https://example.com/",
  121. baseURL: "https://example.com",
  122. strictComparison: true,
  123. },
  124. }
  125. for _, tt := range tests {
  126. t.Run(tt.name, func(t *testing.T) {
  127. parsedBaseUrl, _ := url.Parse(tt.baseURL)
  128. parsedFeedUrl, _ := url.Parse(tt.feedURL)
  129. parsedInputUrl, _ := url.Parse(tt.input)
  130. result, err := RemoveTrackingParameters(parsedBaseUrl, parsedFeedUrl, parsedInputUrl)
  131. if tt.expected == "" {
  132. if err == nil {
  133. t.Errorf("Expected an error for invalid URL, but got none")
  134. }
  135. } else {
  136. if err != nil {
  137. t.Errorf("Unexpected error: %v", err)
  138. }
  139. if tt.strictComparison && result != tt.expected {
  140. t.Errorf("removeTrackingParams(%q) = %q, want %q", tt.input, result, tt.expected)
  141. }
  142. if !urlsEqual(result, tt.expected) {
  143. t.Errorf("removeTrackingParams(%q) = %q, want %q", tt.input, result, tt.expected)
  144. }
  145. }
  146. })
  147. }
  148. }
  149. // urlsEqual compares two URLs for equality, ignoring the order of query parameters
  150. func urlsEqual(url1, url2 string) bool {
  151. u1, err1 := url.Parse(url1)
  152. u2, err2 := url.Parse(url2)
  153. if err1 != nil || err2 != nil {
  154. return false
  155. }
  156. if u1.Scheme != u2.Scheme || u1.Host != u2.Host || u1.Path != u2.Path || u1.Fragment != u2.Fragment {
  157. return false
  158. }
  159. return reflect.DeepEqual(u1.Query(), u2.Query())
  160. }