urlcleaner_test.go 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urlcleaner // import "miniflux.app/v2/internal/reader/urlcleaner"
  4. import (
  5. "net/url"
  6. "reflect"
  7. "testing"
  8. )
  9. func TestRemoveTrackingParams(t *testing.T) {
  10. tests := []struct {
  11. name string
  12. input string
  13. expected string
  14. baseUrl string
  15. feedUrl string
  16. strictComparison bool
  17. }{
  18. {
  19. name: "URL with tracking parameters",
  20. input: "https://example.com/page?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123",
  21. expected: "https://example.com/page?id=123",
  22. },
  23. {
  24. name: "URL with only tracking parameters",
  25. input: "https://example.com/page?utm_source=newsletter&utm_medium=email",
  26. expected: "https://example.com/page",
  27. },
  28. {
  29. name: "URL with no tracking parameters",
  30. input: "https://example.com/page?id=123&foo=bar",
  31. expected: "https://example.com/page?id=123&foo=bar",
  32. },
  33. {
  34. name: "URL with no parameters",
  35. input: "https://example.com/page",
  36. expected: "https://example.com/page",
  37. strictComparison: true,
  38. },
  39. {
  40. name: "URL with mixed case tracking parameters",
  41. input: "https://example.com/page?UTM_SOURCE=newsletter&utm_MEDIUM=email",
  42. expected: "https://example.com/page",
  43. },
  44. {
  45. name: "URL with tracking parameters and fragments",
  46. input: "https://example.com/page?id=123&utm_source=newsletter#section1",
  47. expected: "https://example.com/page?id=123#section1",
  48. },
  49. {
  50. name: "URL with only tracking parameters and fragments",
  51. input: "https://example.com/page?utm_source=newsletter#section1",
  52. expected: "https://example.com/page#section1",
  53. },
  54. {
  55. name: "URL with only one tracking parameter",
  56. input: "https://example.com/page?utm_source=newsletter",
  57. expected: "https://example.com/page",
  58. },
  59. {
  60. name: "URL with encoded characters",
  61. input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
  62. expected: "https://example.com/page?name=John+Doe",
  63. },
  64. {
  65. name: "ref parameter for another url",
  66. input: "https://example.com/page?ref=test.com",
  67. baseUrl: "https://example.com/page",
  68. expected: "https://example.com/page?ref=test.com",
  69. },
  70. {
  71. name: "ref parameter for feed url",
  72. input: "https://example.com/page?ref=feed.com",
  73. baseUrl: "https://example.com/page",
  74. expected: "https://example.com/page",
  75. feedUrl: "http://feed.com",
  76. },
  77. {
  78. name: "ref parameter for site url",
  79. input: "https://example.com/page?ref=example.com",
  80. baseUrl: "https://example.com/page",
  81. expected: "https://example.com/page",
  82. },
  83. {
  84. name: "ref parameter for base url",
  85. input: "https://example.com/page?ref=example.com",
  86. expected: "https://example.com/page",
  87. baseUrl: "https://example.com",
  88. feedUrl: "https://feedburned.com/example",
  89. },
  90. {
  91. name: "ref parameter for base url on subdomain",
  92. input: "https://blog.exploits.club/some-path?ref=blog.exploits.club",
  93. expected: "https://blog.exploits.club/some-path",
  94. baseUrl: "https://blog.exploits.club/some-path",
  95. feedUrl: "https://feedburned.com/exploit.club",
  96. },
  97. {
  98. name: "Non-standard URL parameter with no tracker",
  99. input: "https://example.com/foo.jpg?crop/1420x708/format/webp",
  100. expected: "https://example.com/foo.jpg?crop/1420x708/format/webp",
  101. baseUrl: "https://example.com/page",
  102. strictComparison: true,
  103. },
  104. {
  105. name: "Invalid URL",
  106. input: "https://example|org/",
  107. baseUrl: "https://example.com/page",
  108. expected: "",
  109. },
  110. {
  111. name: "Non-HTTP URL",
  112. input: "mailto:user@example.org",
  113. expected: "mailto:user@example.org",
  114. baseUrl: "https://example.com/page",
  115. strictComparison: true,
  116. },
  117. }
  118. for _, tt := range tests {
  119. t.Run(tt.name, func(t *testing.T) {
  120. parsedBaseUrl, _ := url.Parse(tt.baseUrl)
  121. parsedFeedUrl, _ := url.Parse(tt.feedUrl)
  122. parsedInputUrl, _ := url.Parse(tt.input)
  123. result, err := RemoveTrackingParameters(parsedBaseUrl, parsedFeedUrl, parsedInputUrl)
  124. if tt.expected == "" {
  125. if err == nil {
  126. t.Errorf("Expected an error for invalid URL, but got none")
  127. }
  128. } else {
  129. if err != nil {
  130. t.Errorf("Unexpected error: %v", err)
  131. }
  132. if tt.strictComparison && result != tt.expected {
  133. t.Errorf("removeTrackingParams(%q) = %q, want %q", tt.input, result, tt.expected)
  134. }
  135. if !urlsEqual(result, tt.expected) {
  136. t.Errorf("removeTrackingParams(%q) = %q, want %q", tt.input, result, tt.expected)
  137. }
  138. }
  139. })
  140. }
  141. }
  142. // urlsEqual compares two URLs for equality, ignoring the order of query parameters
  143. func urlsEqual(url1, url2 string) bool {
  144. u1, err1 := url.Parse(url1)
  145. u2, err2 := url.Parse(url2)
  146. if err1 != nil || err2 != nil {
  147. return false
  148. }
  149. if u1.Scheme != u2.Scheme || u1.Host != u2.Host || u1.Path != u2.Path || u1.Fragment != u2.Fragment {
  150. return false
  151. }
  152. return reflect.DeepEqual(u1.Query(), u2.Query())
  153. }