url.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urllib // import "miniflux.app/v2/internal/urllib"
  4. import (
  5. "errors"
  6. "fmt"
  7. "net"
  8. "net/url"
  9. "slices"
  10. "strings"
  11. )
  12. // IsRelativePath reports whether the link is a relative path (no scheme, host, or scheme-relative // form).
  13. func IsRelativePath(link string) bool {
  14. if link == "" {
  15. return false
  16. }
  17. if parsedURL, err := url.Parse(link); err == nil {
  18. // Only allow relative paths (not scheme-relative URLs like //example.org)
  19. // and ensure the URL doesn't have a host component
  20. if !parsedURL.IsAbs() && parsedURL.Host == "" && parsedURL.Scheme == "" {
  21. return true
  22. }
  23. }
  24. return false
  25. }
  26. // hasHTTPPrefix reports whether the URL string begins with an HTTP or HTTPS scheme.
  27. func hasHTTPPrefix(inputURL string) bool {
  28. return strings.HasPrefix(inputURL, "https://") || strings.HasPrefix(inputURL, "http://")
  29. }
  30. // IsAbsoluteURL reports whether the link is absolute.
  31. func IsAbsoluteURL(inputURL string) bool {
  32. if hasHTTPPrefix(inputURL) {
  33. return true
  34. }
  35. parsedURL, err := url.Parse(inputURL)
  36. if err != nil {
  37. return false
  38. }
  39. return parsedURL.IsAbs()
  40. }
  41. // resolveToAbsoluteURL resolves a relative URL using a base URL, parsing the base only if needed.
  42. func resolveToAbsoluteURL(parsedBaseURL *url.URL, baseURL, relativeURL string) (string, error) {
  43. // Avoid parsing the relative URL if it's already absolute
  44. if strings.HasPrefix(relativeURL, "//") {
  45. return "https:" + relativeURL, nil
  46. }
  47. if hasHTTPPrefix(relativeURL) {
  48. return relativeURL, nil
  49. }
  50. // Parse the relative URL and check if it's already absolute
  51. parsedRelativeURL, err := url.Parse(relativeURL)
  52. if err != nil {
  53. return "", fmt.Errorf("unable to parse relative URL: %w", err)
  54. }
  55. if parsedRelativeURL.IsAbs() {
  56. return relativeURL, nil
  57. }
  58. // Parse the base URL if not already parsed
  59. if parsedBaseURL == nil {
  60. parsedBaseURL, err = url.Parse(baseURL)
  61. if err != nil {
  62. return "", fmt.Errorf("unable to parse base URL: %w", err)
  63. }
  64. }
  65. return parsedBaseURL.ResolveReference(parsedRelativeURL).String(), nil
  66. }
  67. // ResolveToAbsoluteURL resolves a relative URL against a base URL and returns the absolute URL.
  68. func ResolveToAbsoluteURL(baseURL, relativeURL string) (string, error) {
  69. return resolveToAbsoluteURL(nil, baseURL, relativeURL)
  70. }
  71. // ResolveToAbsoluteURLWithParsedBaseURL resolves a relative URL using a pre-parsed base URL and returns the absolute URL.
  72. func ResolveToAbsoluteURLWithParsedBaseURL(parsedBaseURL *url.URL, relativeURL string) (string, error) {
  73. return resolveToAbsoluteURL(parsedBaseURL, "", relativeURL)
  74. }
  75. // RootURL returns the scheme and host of the given URL with a trailing slash.
  76. func RootURL(websiteURL string) string {
  77. if websiteURL == "" {
  78. return ""
  79. }
  80. if strings.HasPrefix(websiteURL, "//") {
  81. websiteURL = "https://" + websiteURL[2:]
  82. }
  83. u, err := url.Parse(websiteURL)
  84. if err != nil || u.Scheme == "" || u.Host == "" {
  85. return websiteURL
  86. }
  87. u.Fragment = ""
  88. u.RawQuery = ""
  89. u.Path = "/"
  90. u.RawPath = ""
  91. return u.Scheme + "://" + u.Host + "/"
  92. }
  93. // IsHTTPS reports whether the URL uses HTTPS.
  94. func IsHTTPS(websiteURL string) bool {
  95. parsedURL, err := url.Parse(websiteURL)
  96. if err != nil {
  97. return false
  98. }
  99. return strings.EqualFold(parsedURL.Scheme, "https")
  100. }
  101. // Domain returns the host component of the given URL.
  102. func Domain(websiteURL string) string {
  103. parsedURL, err := url.Parse(websiteURL)
  104. if err != nil {
  105. return websiteURL
  106. }
  107. return parsedURL.Host
  108. }
  109. // DomainWithoutWWW returns the host component without a leading "www." prefix when present.
  110. func DomainWithoutWWW(websiteURL string) string {
  111. return strings.TrimPrefix(Domain(websiteURL), "www.")
  112. }
  113. // JoinBaseURLAndPath joins a base URL and a path segment into a single URL string.
  114. func JoinBaseURLAndPath(baseURL, path string) (string, error) {
  115. if baseURL == "" {
  116. return "", errors.New("empty base URL")
  117. }
  118. if path == "" {
  119. return "", errors.New("empty path")
  120. }
  121. _, err := url.Parse(baseURL)
  122. if err != nil {
  123. return "", fmt.Errorf("invalid base URL: %w", err)
  124. }
  125. finalURL, err := url.JoinPath(baseURL, path)
  126. if err != nil {
  127. return "", fmt.Errorf("unable to join base URL %s and path %s: %w", baseURL, path, err)
  128. }
  129. return finalURL, nil
  130. }
  131. // ResolvesToPrivateIP resolves a hostname and reports whether any resolved IP address is non-public.
  132. func ResolvesToPrivateIP(host string) (bool, error) {
  133. ips, err := net.LookupIP(host)
  134. if err != nil {
  135. return false, err
  136. }
  137. if slices.ContainsFunc(ips, isNonPublicIP) {
  138. return true, nil
  139. }
  140. return false, nil
  141. }
  142. // isNonPublicIP returns true if the given IP is private, loopback,
  143. // link-local, multicast, or unspecified.
  144. func isNonPublicIP(ip net.IP) bool {
  145. if ip == nil {
  146. return true
  147. }
  148. return ip.IsPrivate() ||
  149. ip.IsLoopback() ||
  150. ip.IsLinkLocalUnicast() ||
  151. ip.IsLinkLocalMulticast() ||
  152. ip.IsMulticast() ||
  153. ip.IsUnspecified()
  154. }