url.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urllib // import "miniflux.app/v2/internal/urllib"
  4. import (
  5. "errors"
  6. "fmt"
  7. "net"
  8. "net/netip"
  9. "net/url"
  10. "strings"
  11. )
  12. var rfc6598SharedAddressSpacePrefix = netip.MustParsePrefix("100.64.0.0/10")
  13. // IsRelativePath reports whether the link is a relative path (no scheme, host, or scheme-relative // form).
  14. func IsRelativePath(link string) bool {
  15. if link == "" {
  16. return false
  17. }
  18. // Reject backslashes: Go's url.Parse treats them as ordinary path
  19. // characters, but browsers normalize them to forward slashes, so a target
  20. // like "/\evil.com" would parse as relative here yet redirect to
  21. // //evil.com in the browser (open redirect).
  22. if strings.Contains(link, "\\") {
  23. return false
  24. }
  25. if parsedURL, err := url.Parse(link); err == nil {
  26. // Only allow relative paths (not scheme-relative URLs like //example.org)
  27. // and ensure the URL doesn't have a host component
  28. if !parsedURL.IsAbs() && parsedURL.Host == "" && parsedURL.Scheme == "" {
  29. return true
  30. }
  31. }
  32. return false
  33. }
  34. // hasHTTPPrefix reports whether the URL string begins with an HTTP or HTTPS scheme.
  35. func hasHTTPPrefix(inputURL string) bool {
  36. return strings.HasPrefix(inputURL, "https://") || strings.HasPrefix(inputURL, "http://")
  37. }
  38. // hasSOCKSPrefix reports whether the URL string begins with an SOCKS5 or SOCKS5H scheme.
  39. func hasSOCKSPrefix(inputURL string) bool {
  40. return strings.HasPrefix(inputURL, "socks5://") || strings.HasPrefix(inputURL, "socks5h://")
  41. }
  42. // IsAbsoluteURL reports whether the link is absolute and starts with an HTTP or HTTPS scheme.
  43. func IsAbsoluteURL(inputURL string) bool {
  44. if !hasHTTPPrefix(inputURL) {
  45. return false
  46. }
  47. parsedURL, err := url.Parse(inputURL)
  48. if err != nil {
  49. return false
  50. }
  51. return parsedURL.IsAbs()
  52. }
  53. // IsValidProxyURL reports whether the url is absolute, has a host and starts with an HTTP, HTTPS, SOCKS5 or SOCKS5H scheme.
  54. func IsValidProxyURL(inputURL string) bool {
  55. if !hasHTTPPrefix(inputURL) && !hasSOCKSPrefix(inputURL) {
  56. return false
  57. }
  58. parsedURL, err := url.Parse(inputURL)
  59. if err != nil {
  60. return false
  61. }
  62. return parsedURL.IsAbs() && parsedURL.Host != ""
  63. }
  64. // resolveToAbsoluteURL resolves a relative URL using a base URL, parsing the base only if needed.
  65. func resolveToAbsoluteURL(parsedBaseURL *url.URL, baseURL, relativeURL string) (string, error) {
  66. // Avoid parsing the relative URL if it's already absolute
  67. if strings.HasPrefix(relativeURL, "//") {
  68. return "https:" + relativeURL, nil
  69. }
  70. if hasHTTPPrefix(relativeURL) {
  71. return relativeURL, nil
  72. }
  73. // Parse the relative URL and check if it's already absolute
  74. parsedRelativeURL, err := url.Parse(relativeURL)
  75. if err != nil {
  76. return "", fmt.Errorf("unable to parse relative URL: %w", err)
  77. }
  78. if parsedRelativeURL.IsAbs() {
  79. return relativeURL, nil
  80. }
  81. // Parse the base URL if not already parsed
  82. if parsedBaseURL == nil {
  83. parsedBaseURL, err = url.Parse(baseURL)
  84. if err != nil {
  85. return "", fmt.Errorf("unable to parse base URL: %w", err)
  86. }
  87. }
  88. return parsedBaseURL.ResolveReference(parsedRelativeURL).String(), nil
  89. }
  90. // ResolveToAbsoluteURL resolves a relative URL against a base URL and returns the absolute URL.
  91. func ResolveToAbsoluteURL(baseURL, relativeURL string) (string, error) {
  92. return resolveToAbsoluteURL(nil, baseURL, relativeURL)
  93. }
  94. // ResolveToAbsoluteURLWithParsedBaseURL resolves a relative URL using a pre-parsed base URL and returns the absolute URL.
  95. func ResolveToAbsoluteURLWithParsedBaseURL(parsedBaseURL *url.URL, relativeURL string) (string, error) {
  96. return resolveToAbsoluteURL(parsedBaseURL, "", relativeURL)
  97. }
  98. // RootURL returns the scheme and host of the given URL with a trailing slash.
  99. func RootURL(websiteURL string) string {
  100. if websiteURL == "" {
  101. return ""
  102. }
  103. if strings.HasPrefix(websiteURL, "//") {
  104. websiteURL = "https://" + websiteURL[2:]
  105. }
  106. u, err := url.Parse(websiteURL)
  107. if err != nil || u.Scheme == "" || u.Host == "" {
  108. return websiteURL
  109. }
  110. u.Fragment = ""
  111. u.RawQuery = ""
  112. u.Path = "/"
  113. u.RawPath = ""
  114. return u.Scheme + "://" + u.Host + "/"
  115. }
  116. // IsHTTPS reports whether the URL uses HTTPS.
  117. func IsHTTPS(websiteURL string) bool {
  118. parsedURL, err := url.Parse(websiteURL)
  119. if err != nil {
  120. return false
  121. }
  122. return strings.EqualFold(parsedURL.Scheme, "https")
  123. }
  124. // Domain returns the host component of the given URL.
  125. func Domain(websiteURL string) string {
  126. parsedURL, err := url.Parse(websiteURL)
  127. if err != nil {
  128. return websiteURL
  129. }
  130. return parsedURL.Host
  131. }
  132. // DomainWithoutWWW returns the host component without a leading "www." prefix when present.
  133. func DomainWithoutWWW(websiteURL string) string {
  134. return strings.TrimPrefix(Domain(websiteURL), "www.")
  135. }
  136. // JoinBaseURLAndPath joins a base URL and a path segment into a single URL string.
  137. func JoinBaseURLAndPath(baseURL, path string) (string, error) {
  138. if baseURL == "" {
  139. return "", errors.New("empty base URL")
  140. }
  141. if path == "" {
  142. return "", errors.New("empty path")
  143. }
  144. finalURL, err := url.JoinPath(baseURL, path)
  145. if err != nil {
  146. return "", fmt.Errorf("unable to join base URL %s and path %s: %w", baseURL, path, err)
  147. }
  148. return finalURL, nil
  149. }
  150. // IsNonPublicIP returns true if the given IP is private, loopback,
  151. // link-local, multicast, or unspecified.
  152. func IsNonPublicIP(ip net.IP) bool {
  153. if ip == nil {
  154. return true
  155. }
  156. if addr, ok := netip.AddrFromSlice(ip); ok && rfc6598SharedAddressSpacePrefix.Contains(addr.Unmap()) {
  157. return true
  158. }
  159. return ip.IsPrivate() ||
  160. ip.IsLoopback() ||
  161. ip.IsLinkLocalUnicast() ||
  162. ip.IsLinkLocalMulticast() ||
  163. ip.IsMulticast() ||
  164. ip.IsUnspecified()
  165. }