url.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urllib // import "miniflux.app/v2/internal/urllib"
  4. import (
  5. "errors"
  6. "fmt"
  7. "net"
  8. "net/netip"
  9. "net/url"
  10. "strings"
  11. )
  12. var rfc6598SharedAddressSpacePrefix = netip.MustParsePrefix("100.64.0.0/10")
  13. // IsRelativePath reports whether the link is a relative path (no scheme, host, or scheme-relative // form).
  14. func IsRelativePath(link string) bool {
  15. if link == "" {
  16. return false
  17. }
  18. if parsedURL, err := url.Parse(link); err == nil {
  19. // Only allow relative paths (not scheme-relative URLs like //example.org)
  20. // and ensure the URL doesn't have a host component
  21. if !parsedURL.IsAbs() && parsedURL.Host == "" && parsedURL.Scheme == "" {
  22. return true
  23. }
  24. }
  25. return false
  26. }
  27. // hasHTTPPrefix reports whether the URL string begins with an HTTP or HTTPS scheme.
  28. func hasHTTPPrefix(inputURL string) bool {
  29. return strings.HasPrefix(inputURL, "https://") || strings.HasPrefix(inputURL, "http://")
  30. }
  31. // IsAbsoluteURL reports whether the link is absolute and starts with an HTTP or HTTPS scheme.
  32. func IsAbsoluteURL(inputURL string) bool {
  33. if !hasHTTPPrefix(inputURL) {
  34. return false
  35. }
  36. parsedURL, err := url.Parse(inputURL)
  37. if err != nil {
  38. return false
  39. }
  40. return parsedURL.IsAbs()
  41. }
  42. // resolveToAbsoluteURL resolves a relative URL using a base URL, parsing the base only if needed.
  43. func resolveToAbsoluteURL(parsedBaseURL *url.URL, baseURL, relativeURL string) (string, error) {
  44. // Avoid parsing the relative URL if it's already absolute
  45. if strings.HasPrefix(relativeURL, "//") {
  46. return "https:" + relativeURL, nil
  47. }
  48. if hasHTTPPrefix(relativeURL) {
  49. return relativeURL, nil
  50. }
  51. // Parse the relative URL and check if it's already absolute
  52. parsedRelativeURL, err := url.Parse(relativeURL)
  53. if err != nil {
  54. return "", fmt.Errorf("unable to parse relative URL: %w", err)
  55. }
  56. if parsedRelativeURL.IsAbs() {
  57. return relativeURL, nil
  58. }
  59. // Parse the base URL if not already parsed
  60. if parsedBaseURL == nil {
  61. parsedBaseURL, err = url.Parse(baseURL)
  62. if err != nil {
  63. return "", fmt.Errorf("unable to parse base URL: %w", err)
  64. }
  65. }
  66. return parsedBaseURL.ResolveReference(parsedRelativeURL).String(), nil
  67. }
  68. // ResolveToAbsoluteURL resolves a relative URL against a base URL and returns the absolute URL.
  69. func ResolveToAbsoluteURL(baseURL, relativeURL string) (string, error) {
  70. return resolveToAbsoluteURL(nil, baseURL, relativeURL)
  71. }
  72. // ResolveToAbsoluteURLWithParsedBaseURL resolves a relative URL using a pre-parsed base URL and returns the absolute URL.
  73. func ResolveToAbsoluteURLWithParsedBaseURL(parsedBaseURL *url.URL, relativeURL string) (string, error) {
  74. return resolveToAbsoluteURL(parsedBaseURL, "", relativeURL)
  75. }
  76. // RootURL returns the scheme and host of the given URL with a trailing slash.
  77. func RootURL(websiteURL string) string {
  78. if websiteURL == "" {
  79. return ""
  80. }
  81. if strings.HasPrefix(websiteURL, "//") {
  82. websiteURL = "https://" + websiteURL[2:]
  83. }
  84. u, err := url.Parse(websiteURL)
  85. if err != nil || u.Scheme == "" || u.Host == "" {
  86. return websiteURL
  87. }
  88. u.Fragment = ""
  89. u.RawQuery = ""
  90. u.Path = "/"
  91. u.RawPath = ""
  92. return u.Scheme + "://" + u.Host + "/"
  93. }
  94. // IsHTTPS reports whether the URL uses HTTPS.
  95. func IsHTTPS(websiteURL string) bool {
  96. parsedURL, err := url.Parse(websiteURL)
  97. if err != nil {
  98. return false
  99. }
  100. return strings.EqualFold(parsedURL.Scheme, "https")
  101. }
  102. // Domain returns the host component of the given URL.
  103. func Domain(websiteURL string) string {
  104. parsedURL, err := url.Parse(websiteURL)
  105. if err != nil {
  106. return websiteURL
  107. }
  108. return parsedURL.Host
  109. }
  110. // DomainWithoutWWW returns the host component without a leading "www." prefix when present.
  111. func DomainWithoutWWW(websiteURL string) string {
  112. return strings.TrimPrefix(Domain(websiteURL), "www.")
  113. }
  114. // JoinBaseURLAndPath joins a base URL and a path segment into a single URL string.
  115. func JoinBaseURLAndPath(baseURL, path string) (string, error) {
  116. if baseURL == "" {
  117. return "", errors.New("empty base URL")
  118. }
  119. if path == "" {
  120. return "", errors.New("empty path")
  121. }
  122. _, err := url.Parse(baseURL)
  123. if err != nil {
  124. return "", fmt.Errorf("invalid base URL: %w", err)
  125. }
  126. finalURL, err := url.JoinPath(baseURL, path)
  127. if err != nil {
  128. return "", fmt.Errorf("unable to join base URL %s and path %s: %w", baseURL, path, err)
  129. }
  130. return finalURL, nil
  131. }
  132. // IsNonPublicIP returns true if the given IP is private, loopback,
  133. // link-local, multicast, or unspecified.
  134. func IsNonPublicIP(ip net.IP) bool {
  135. if ip == nil {
  136. return true
  137. }
  138. if addr, ok := netip.AddrFromSlice(ip); ok && rfc6598SharedAddressSpacePrefix.Contains(addr.Unmap()) {
  139. return true
  140. }
  141. return ip.IsPrivate() ||
  142. ip.IsLoopback() ||
  143. ip.IsLinkLocalUnicast() ||
  144. ip.IsLinkLocalMulticast() ||
  145. ip.IsMulticast() ||
  146. ip.IsUnspecified()
  147. }