url.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package urllib // import "miniflux.app/v2/internal/urllib"
  4. import (
  5. "errors"
  6. "fmt"
  7. "net"
  8. "net/url"
  9. "strings"
  10. )
  11. // IsRelativePath reports whether the link is a relative path (no scheme, host, or scheme-relative // form).
  12. func IsRelativePath(link string) bool {
  13. if link == "" {
  14. return false
  15. }
  16. if parsedURL, err := url.Parse(link); err == nil {
  17. // Only allow relative paths (not scheme-relative URLs like //example.org)
  18. // and ensure the URL doesn't have a host component
  19. if !parsedURL.IsAbs() && parsedURL.Host == "" && parsedURL.Scheme == "" {
  20. return true
  21. }
  22. }
  23. return false
  24. }
  25. // hasHTTPPrefix reports whether the URL string begins with an HTTP or HTTPS scheme.
  26. func hasHTTPPrefix(inputURL string) bool {
  27. return strings.HasPrefix(inputURL, "https://") || strings.HasPrefix(inputURL, "http://")
  28. }
  29. // IsAbsoluteURL reports whether the link is absolute.
  30. func IsAbsoluteURL(inputURL string) bool {
  31. if hasHTTPPrefix(inputURL) {
  32. return true
  33. }
  34. parsedURL, err := url.Parse(inputURL)
  35. if err != nil {
  36. return false
  37. }
  38. return parsedURL.IsAbs()
  39. }
  40. // resolveToAbsoluteURL resolves a relative URL using a base URL, parsing the base only if needed.
  41. func resolveToAbsoluteURL(parsedBaseURL *url.URL, baseURL, relativeURL string) (string, error) {
  42. // Avoid parsing the relative URL if it's already absolute
  43. if strings.HasPrefix(relativeURL, "//") {
  44. return "https:" + relativeURL, nil
  45. }
  46. if hasHTTPPrefix(relativeURL) {
  47. return relativeURL, nil
  48. }
  49. // Parse the relative URL and check if it's already absolute
  50. parsedRelativeURL, err := url.Parse(relativeURL)
  51. if err != nil {
  52. return "", fmt.Errorf("unable to parse relative URL: %w", err)
  53. }
  54. if parsedRelativeURL.IsAbs() {
  55. return relativeURL, nil
  56. }
  57. // Parse the base URL if not already parsed
  58. if parsedBaseURL == nil {
  59. parsedBaseURL, err = url.Parse(baseURL)
  60. if err != nil {
  61. return "", fmt.Errorf("unable to parse base URL: %w", err)
  62. }
  63. }
  64. return parsedBaseURL.ResolveReference(parsedRelativeURL).String(), nil
  65. }
  66. // ResolveToAbsoluteURL resolves a relative URL against a base URL and returns the absolute URL.
  67. func ResolveToAbsoluteURL(baseURL, relativeURL string) (string, error) {
  68. return resolveToAbsoluteURL(nil, baseURL, relativeURL)
  69. }
  70. // ResolveToAbsoluteURLWithParsedBaseURL resolves a relative URL using a pre-parsed base URL and returns the absolute URL.
  71. func ResolveToAbsoluteURLWithParsedBaseURL(parsedBaseURL *url.URL, relativeURL string) (string, error) {
  72. return resolveToAbsoluteURL(parsedBaseURL, "", relativeURL)
  73. }
  74. // RootURL returns the scheme and host of the given URL with a trailing slash.
  75. func RootURL(websiteURL string) string {
  76. if websiteURL == "" {
  77. return ""
  78. }
  79. if strings.HasPrefix(websiteURL, "//") {
  80. websiteURL = "https://" + websiteURL[2:]
  81. }
  82. u, err := url.Parse(websiteURL)
  83. if err != nil || u.Scheme == "" || u.Host == "" {
  84. return websiteURL
  85. }
  86. u.Fragment = ""
  87. u.RawQuery = ""
  88. u.Path = "/"
  89. u.RawPath = ""
  90. return u.Scheme + "://" + u.Host + "/"
  91. }
  92. // IsHTTPS reports whether the URL uses HTTPS.
  93. func IsHTTPS(websiteURL string) bool {
  94. parsedURL, err := url.Parse(websiteURL)
  95. if err != nil {
  96. return false
  97. }
  98. return strings.EqualFold(parsedURL.Scheme, "https")
  99. }
  100. // Domain returns the host component of the given URL.
  101. func Domain(websiteURL string) string {
  102. parsedURL, err := url.Parse(websiteURL)
  103. if err != nil {
  104. return websiteURL
  105. }
  106. return parsedURL.Host
  107. }
  108. // DomainWithoutWWW returns the host component without a leading "www." prefix when present.
  109. func DomainWithoutWWW(websiteURL string) string {
  110. return strings.TrimPrefix(Domain(websiteURL), "www.")
  111. }
  112. // JoinBaseURLAndPath joins a base URL and a path segment into a single URL string.
  113. func JoinBaseURLAndPath(baseURL, path string) (string, error) {
  114. if baseURL == "" {
  115. return "", errors.New("empty base URL")
  116. }
  117. if path == "" {
  118. return "", errors.New("empty path")
  119. }
  120. _, err := url.Parse(baseURL)
  121. if err != nil {
  122. return "", fmt.Errorf("invalid base URL: %w", err)
  123. }
  124. finalURL, err := url.JoinPath(baseURL, path)
  125. if err != nil {
  126. return "", fmt.Errorf("unable to join base URL %s and path %s: %w", baseURL, path, err)
  127. }
  128. return finalURL, nil
  129. }
  130. // IsNonPublicIP returns true if the given IP is private, loopback,
  131. // link-local, multicast, or unspecified.
  132. func IsNonPublicIP(ip net.IP) bool {
  133. if ip == nil {
  134. return true
  135. }
  136. return ip.IsPrivate() ||
  137. ip.IsLoopback() ||
  138. ip.IsLinkLocalUnicast() ||
  139. ip.IsLinkLocalMulticast() ||
  140. ip.IsMulticast() ||
  141. ip.IsUnspecified()
  142. }