urlcleaner_test.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urlcleaner // import "miniflux.app/v2/internal/reader/urlcleaner"
  4. import (
  5. "net/url"
  6. "reflect"
  7. "testing"
  8. )
  9. func TestRemoveTrackingParams(t *testing.T) {
  10. tests := []struct {
  11. name string
  12. input string
  13. expected string
  14. }{
  15. {
  16. name: "URL with tracking parameters",
  17. input: "https://example.com/page?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123",
  18. expected: "https://example.com/page?id=123",
  19. },
  20. {
  21. name: "URL with only tracking parameters",
  22. input: "https://example.com/page?utm_source=newsletter&utm_medium=email",
  23. expected: "https://example.com/page",
  24. },
  25. {
  26. name: "URL with no tracking parameters",
  27. input: "https://example.com/page?id=123&foo=bar",
  28. expected: "https://example.com/page?id=123&foo=bar",
  29. },
  30. {
  31. name: "URL with no parameters",
  32. input: "https://example.com/page",
  33. expected: "https://example.com/page",
  34. },
  35. {
  36. name: "URL with mixed case tracking parameters",
  37. input: "https://example.com/page?UTM_SOURCE=newsletter&utm_MEDIUM=email",
  38. expected: "https://example.com/page",
  39. },
  40. {
  41. name: "URL with tracking parameters and fragments",
  42. input: "https://example.com/page?id=123&utm_source=newsletter#section1",
  43. expected: "https://example.com/page?id=123#section1",
  44. },
  45. {
  46. name: "URL with only tracking parameters and fragments",
  47. input: "https://example.com/page?utm_source=newsletter#section1",
  48. expected: "https://example.com/page#section1",
  49. },
  50. {
  51. name: "URL with only one tracking parameter",
  52. input: "https://example.com/page?utm_source=newsletter",
  53. expected: "https://example.com/page",
  54. },
  55. {
  56. name: "URL with encoded characters",
  57. input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
  58. expected: "https://example.com/page?name=John+Doe",
  59. },
  60. {
  61. name: "Invalid URL",
  62. input: "https://example|org/",
  63. expected: "",
  64. },
  65. {
  66. name: "Non-HTTP URL",
  67. input: "mailto:user@example.org",
  68. expected: "mailto:user@example.org",
  69. },
  70. }
  71. for _, tt := range tests {
  72. t.Run(tt.name, func(t *testing.T) {
  73. result, err := RemoveTrackingParameters(tt.input)
  74. if tt.expected == "" {
  75. if err == nil {
  76. t.Errorf("Expected an error for invalid URL, but got none")
  77. }
  78. } else {
  79. if err != nil {
  80. t.Errorf("Unexpected error: %v", err)
  81. }
  82. if !urlsEqual(result, tt.expected) {
  83. t.Errorf("removeTrackingParams(%q) = %q, want %q", tt.input, result, tt.expected)
  84. }
  85. }
  86. })
  87. }
  88. }
  89. // urlsEqual compares two URLs for equality, ignoring the order of query parameters
  90. func urlsEqual(url1, url2 string) bool {
  91. u1, err1 := url.Parse(url1)
  92. u2, err2 := url.Parse(url2)
  93. if err1 != nil || err2 != nil {
  94. return false
  95. }
  96. if u1.Scheme != u2.Scheme || u1.Host != u2.Host || u1.Path != u2.Path || u1.Fragment != u2.Fragment {
  97. return false
  98. }
  99. return reflect.DeepEqual(u1.Query(), u2.Query())
  100. }