urlcleaner_test.go 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urlcleaner // import "miniflux.app/v2/internal/reader/urlcleaner"
  4. import (
  5. "net/url"
  6. "reflect"
  7. "testing"
  8. )
  9. func TestRemoveTrackingParams(t *testing.T) {
  10. tests := []struct {
  11. name string
  12. input string
  13. expected string
  14. strictComparison bool
  15. }{
  16. {
  17. name: "URL with tracking parameters",
  18. input: "https://example.com/page?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123",
  19. expected: "https://example.com/page?id=123",
  20. },
  21. {
  22. name: "URL with only tracking parameters",
  23. input: "https://example.com/page?utm_source=newsletter&utm_medium=email",
  24. expected: "https://example.com/page",
  25. },
  26. {
  27. name: "URL with no tracking parameters",
  28. input: "https://example.com/page?id=123&foo=bar",
  29. expected: "https://example.com/page?id=123&foo=bar",
  30. },
  31. {
  32. name: "URL with no parameters",
  33. input: "https://example.com/page",
  34. expected: "https://example.com/page",
  35. strictComparison: true,
  36. },
  37. {
  38. name: "URL with mixed case tracking parameters",
  39. input: "https://example.com/page?UTM_SOURCE=newsletter&utm_MEDIUM=email",
  40. expected: "https://example.com/page",
  41. },
  42. {
  43. name: "URL with tracking parameters and fragments",
  44. input: "https://example.com/page?id=123&utm_source=newsletter#section1",
  45. expected: "https://example.com/page?id=123#section1",
  46. },
  47. {
  48. name: "URL with only tracking parameters and fragments",
  49. input: "https://example.com/page?utm_source=newsletter#section1",
  50. expected: "https://example.com/page#section1",
  51. },
  52. {
  53. name: "URL with only one tracking parameter",
  54. input: "https://example.com/page?utm_source=newsletter",
  55. expected: "https://example.com/page",
  56. },
  57. {
  58. name: "URL with encoded characters",
  59. input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
  60. expected: "https://example.com/page?name=John+Doe",
  61. },
  62. {
  63. name: "Non-standard URL parameter with no tracker",
  64. input: "https://example.com/foo.jpg?crop/1420x708/format/webp",
  65. expected: "https://example.com/foo.jpg?crop/1420x708/format/webp",
  66. strictComparison: true,
  67. },
  68. {
  69. name: "Invalid URL",
  70. input: "https://example|org/",
  71. expected: "",
  72. },
  73. {
  74. name: "Non-HTTP URL",
  75. input: "mailto:user@example.org",
  76. expected: "mailto:user@example.org",
  77. strictComparison: true,
  78. },
  79. }
  80. for _, tt := range tests {
  81. t.Run(tt.name, func(t *testing.T) {
  82. result, err := RemoveTrackingParameters(tt.input)
  83. if tt.expected == "" {
  84. if err == nil {
  85. t.Errorf("Expected an error for invalid URL, but got none")
  86. }
  87. } else {
  88. if err != nil {
  89. t.Errorf("Unexpected error: %v", err)
  90. }
  91. if tt.strictComparison && result != tt.expected {
  92. t.Errorf("removeTrackingParams(%q) = %q, want %q", tt.input, result, tt.expected)
  93. }
  94. if !urlsEqual(result, tt.expected) {
  95. t.Errorf("removeTrackingParams(%q) = %q, want %q", tt.input, result, tt.expected)
  96. }
  97. }
  98. })
  99. }
  100. }
  101. // urlsEqual compares two URLs for equality, ignoring the order of query parameters
  102. func urlsEqual(url1, url2 string) bool {
  103. u1, err1 := url.Parse(url1)
  104. u2, err2 := url.Parse(url2)
  105. if err1 != nil || err2 != nil {
  106. return false
  107. }
  108. if u1.Scheme != u2.Scheme || u1.Host != u2.Host || u1.Path != u2.Path || u1.Fragment != u2.Fragment {
  109. return false
  110. }
  111. return reflect.DeepEqual(u1.Query(), u2.Query())
  112. }