urlcleaner.go 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urlcleaner // import "miniflux.app/v2/internal/reader/urlcleaner"
  4. import (
  5. "fmt"
  6. "net/url"
  7. "strings"
  8. )
  9. // Interesting lists:
  10. // https://raw.githubusercontent.com/AdguardTeam/AdguardFilters/master/TrackParamFilter/sections/general_url.txt
  11. // https://firefox.settings.services.mozilla.com/v1/buckets/main/collections/query-stripping/records
  12. var trackingParams = map[string]bool{
  13. // https://en.wikipedia.org/wiki/UTM_parameters#Parameters
  14. "utm_source": true,
  15. "utm_medium": true,
  16. "utm_campaign": true,
  17. "utm_term": true,
  18. "utm_content": true,
  19. // Facebook Click Identifiers
  20. "fbclid": true,
  21. "_openstat": true,
  22. // Google Click Identifiers
  23. "gclid": true,
  24. "dclid": true,
  25. "gbraid": true,
  26. "wbraid": true,
  27. // Yandex Click Identifiers
  28. "yclid": true,
  29. "ysclid": true,
  30. // Twitter Click Identifier
  31. "twclid": true,
  32. // Microsoft Click Identifier
  33. "msclkid": true,
  34. // Mailchimp Click Identifiers
  35. "mc_cid": true,
  36. "mc_eid": true,
  37. // Wicked Reports click tracking
  38. "wickedid": true,
  39. // Hubspot Click Identifiers
  40. "hsa_cam": true,
  41. "_hsenc": true,
  42. "__hssc": true,
  43. "__hstc": true,
  44. "__hsfp": true,
  45. "hsctatracking": true,
  46. // Olytics
  47. "rb_clickid": true,
  48. "oly_anon_id": true,
  49. "oly_enc_id": true,
  50. // Vero Click Identifier
  51. "vero_id": true,
  52. // Marketo email tracking
  53. "mkt_tok": true,
  54. }
  55. func RemoveTrackingParameters(inputURL string) (string, error) {
  56. parsedURL, err := url.Parse(inputURL)
  57. if err != nil {
  58. return "", fmt.Errorf("urlcleaner: error parsing URL: %v", err)
  59. }
  60. if !strings.HasPrefix(parsedURL.Scheme, "http") {
  61. return inputURL, nil
  62. }
  63. queryParams := parsedURL.Query()
  64. hasTrackers := false
  65. // Remove tracking parameters
  66. for param := range queryParams {
  67. if trackingParams[strings.ToLower(param)] {
  68. queryParams.Del(param)
  69. hasTrackers = true
  70. }
  71. }
  72. // Do not modify the URL if there are no tracking parameters
  73. if !hasTrackers {
  74. return inputURL, nil
  75. }
  76. parsedURL.RawQuery = queryParams.Encode()
  77. // Remove trailing "?" if query string is empty
  78. cleanedURL := parsedURL.String()
  79. cleanedURL = strings.TrimSuffix(cleanedURL, "?")
  80. return cleanedURL, nil
  81. }