url.go 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urllib // import "miniflux.app/v2/internal/urllib"
  4. import (
  5. "errors"
  6. "fmt"
  7. "net"
  8. "net/netip"
  9. "net/url"
  10. "strings"
  11. )
  12. var rfc6598SharedAddressSpacePrefix = netip.MustParsePrefix("100.64.0.0/10")
  13. // IsRelativePath reports whether the link is a relative path (no scheme, host, or scheme-relative // form).
  14. func IsRelativePath(link string) bool {
  15. if link == "" {
  16. return false
  17. }
  18. // Reject backslashes: Go's url.Parse treats them as ordinary path
  19. // characters, but browsers normalize them to forward slashes, so a target
  20. // like "/\evil.com" would parse as relative here yet redirect to
  21. // //evil.com in the browser (open redirect).
  22. if strings.Contains(link, "\\") {
  23. return false
  24. }
  25. if parsedURL, err := url.Parse(link); err == nil {
  26. // Only allow relative paths (not scheme-relative URLs like //example.org)
  27. // and ensure the URL doesn't have a host component
  28. if !parsedURL.IsAbs() && parsedURL.Host == "" && parsedURL.Scheme == "" {
  29. return true
  30. }
  31. }
  32. return false
  33. }
  34. // hasHTTPPrefix reports whether the URL string begins with an HTTP or HTTPS scheme.
  35. func hasHTTPPrefix(inputURL string) bool {
  36. return strings.HasPrefix(inputURL, "https://") || strings.HasPrefix(inputURL, "http://")
  37. }
  38. // IsAbsoluteURL reports whether the link is absolute and starts with an HTTP or HTTPS scheme.
  39. func IsAbsoluteURL(inputURL string) bool {
  40. if !hasHTTPPrefix(inputURL) {
  41. return false
  42. }
  43. parsedURL, err := url.Parse(inputURL)
  44. if err != nil {
  45. return false
  46. }
  47. return parsedURL.IsAbs()
  48. }
  49. // resolveToAbsoluteURL resolves a relative URL using a base URL, parsing the base only if needed.
  50. func resolveToAbsoluteURL(parsedBaseURL *url.URL, baseURL, relativeURL string) (string, error) {
  51. // Avoid parsing the relative URL if it's already absolute
  52. if strings.HasPrefix(relativeURL, "//") {
  53. return "https:" + relativeURL, nil
  54. }
  55. if hasHTTPPrefix(relativeURL) {
  56. return relativeURL, nil
  57. }
  58. // Parse the relative URL and check if it's already absolute
  59. parsedRelativeURL, err := url.Parse(relativeURL)
  60. if err != nil {
  61. return "", fmt.Errorf("unable to parse relative URL: %w", err)
  62. }
  63. if parsedRelativeURL.IsAbs() {
  64. return relativeURL, nil
  65. }
  66. // Parse the base URL if not already parsed
  67. if parsedBaseURL == nil {
  68. parsedBaseURL, err = url.Parse(baseURL)
  69. if err != nil {
  70. return "", fmt.Errorf("unable to parse base URL: %w", err)
  71. }
  72. }
  73. return parsedBaseURL.ResolveReference(parsedRelativeURL).String(), nil
  74. }
  75. // ResolveToAbsoluteURL resolves a relative URL against a base URL and returns the absolute URL.
  76. func ResolveToAbsoluteURL(baseURL, relativeURL string) (string, error) {
  77. return resolveToAbsoluteURL(nil, baseURL, relativeURL)
  78. }
  79. // ResolveToAbsoluteURLWithParsedBaseURL resolves a relative URL using a pre-parsed base URL and returns the absolute URL.
  80. func ResolveToAbsoluteURLWithParsedBaseURL(parsedBaseURL *url.URL, relativeURL string) (string, error) {
  81. return resolveToAbsoluteURL(parsedBaseURL, "", relativeURL)
  82. }
  83. // RootURL returns the scheme and host of the given URL with a trailing slash.
  84. func RootURL(websiteURL string) string {
  85. if websiteURL == "" {
  86. return ""
  87. }
  88. if strings.HasPrefix(websiteURL, "//") {
  89. websiteURL = "https://" + websiteURL[2:]
  90. }
  91. u, err := url.Parse(websiteURL)
  92. if err != nil || u.Scheme == "" || u.Host == "" {
  93. return websiteURL
  94. }
  95. u.Fragment = ""
  96. u.RawQuery = ""
  97. u.Path = "/"
  98. u.RawPath = ""
  99. return u.Scheme + "://" + u.Host + "/"
  100. }
  101. // IsHTTPS reports whether the URL uses HTTPS.
  102. func IsHTTPS(websiteURL string) bool {
  103. parsedURL, err := url.Parse(websiteURL)
  104. if err != nil {
  105. return false
  106. }
  107. return strings.EqualFold(parsedURL.Scheme, "https")
  108. }
  109. // Domain returns the host component of the given URL.
  110. func Domain(websiteURL string) string {
  111. parsedURL, err := url.Parse(websiteURL)
  112. if err != nil {
  113. return websiteURL
  114. }
  115. return parsedURL.Host
  116. }
  117. // DomainWithoutWWW returns the host component without a leading "www." prefix when present.
  118. func DomainWithoutWWW(websiteURL string) string {
  119. return strings.TrimPrefix(Domain(websiteURL), "www.")
  120. }
  121. // JoinBaseURLAndPath joins a base URL and a path segment into a single URL string.
  122. func JoinBaseURLAndPath(baseURL, path string) (string, error) {
  123. if baseURL == "" {
  124. return "", errors.New("empty base URL")
  125. }
  126. if path == "" {
  127. return "", errors.New("empty path")
  128. }
  129. finalURL, err := url.JoinPath(baseURL, path)
  130. if err != nil {
  131. return "", fmt.Errorf("unable to join base URL %s and path %s: %w", baseURL, path, err)
  132. }
  133. return finalURL, nil
  134. }
  135. // IsNonPublicIP returns true if the given IP is private, loopback,
  136. // link-local, multicast, or unspecified.
  137. func IsNonPublicIP(ip net.IP) bool {
  138. if ip == nil {
  139. return true
  140. }
  141. if addr, ok := netip.AddrFromSlice(ip); ok && rfc6598SharedAddressSpacePrefix.Contains(addr.Unmap()) {
  142. return true
  143. }
  144. return ip.IsPrivate() ||
  145. ip.IsLoopback() ||
  146. ip.IsLinkLocalUnicast() ||
  147. ip.IsLinkLocalMulticast() ||
  148. ip.IsMulticast() ||
  149. ip.IsUnspecified()
  150. }