request_builder.go 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
  4. import (
  5. "context"
  6. "crypto/tls"
  7. "encoding/base64"
  8. "errors"
  9. "fmt"
  10. "log/slog"
  11. "net"
  12. "net/http"
  13. "net/url"
  14. "slices"
  15. "strings"
  16. "syscall"
  17. "time"
  18. "miniflux.app/v2/internal/config"
  19. "miniflux.app/v2/internal/proxyrotator"
  20. "miniflux.app/v2/internal/urllib"
  21. )
  22. const (
  23. defaultHTTPClientTimeout = 20 * time.Second
  24. defaultAcceptHeader = "application/xml,application/atom+xml,application/rss+xml,application/rdf+xml,application/feed+json,text/html,*/*;q=0.9"
  25. )
  26. var (
  27. ErrHostnameResolution = errors.New("fetcher: unable to resolve request hostname")
  28. ErrPrivateNetworkHost = errors.New("fetcher: refusing to access private network host")
  29. )
  30. type RequestBuilder struct {
  31. headers http.Header
  32. clientProxyURL *url.URL
  33. clientTimeout time.Duration
  34. useClientProxy bool
  35. withoutRedirects bool
  36. ignoreTLSErrors bool
  37. disableHTTP2 bool
  38. disableCompression bool
  39. proxyRotator *proxyrotator.ProxyRotator
  40. feedProxyURL string
  41. }
  42. func NewRequestBuilder() *RequestBuilder {
  43. return &RequestBuilder{
  44. headers: make(http.Header),
  45. clientTimeout: defaultHTTPClientTimeout,
  46. }
  47. }
  48. // Clone returns an independent copy of the builder. Mutating the copy (for
  49. // example to disable redirects for a single request) leaves the original
  50. // untouched.
  51. func (r *RequestBuilder) Clone() *RequestBuilder {
  52. clone := *r
  53. clone.headers = r.headers.Clone()
  54. return &clone
  55. }
  56. func (r *RequestBuilder) WithHeader(key, value string) *RequestBuilder {
  57. r.headers.Set(key, value)
  58. return r
  59. }
  60. func (r *RequestBuilder) WithETag(etag string) *RequestBuilder {
  61. if etag != "" {
  62. r.headers.Set("If-None-Match", etag)
  63. }
  64. return r
  65. }
  66. func (r *RequestBuilder) WithLastModified(lastModified string) *RequestBuilder {
  67. if lastModified != "" {
  68. r.headers.Set("If-Modified-Since", lastModified)
  69. }
  70. return r
  71. }
  72. func (r *RequestBuilder) WithUserAgent(userAgent string, defaultUserAgent string) *RequestBuilder {
  73. if userAgent != "" {
  74. r.headers.Set("User-Agent", userAgent)
  75. } else {
  76. r.headers.Set("User-Agent", defaultUserAgent)
  77. }
  78. return r
  79. }
  80. func (r *RequestBuilder) WithCookie(cookie string) *RequestBuilder {
  81. if cookie != "" {
  82. r.headers.Set("Cookie", cookie)
  83. }
  84. return r
  85. }
  86. func (r *RequestBuilder) WithUsernameAndPassword(username, password string) *RequestBuilder {
  87. if username != "" && password != "" {
  88. r.headers.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(username+":"+password)))
  89. }
  90. return r
  91. }
  92. func (r *RequestBuilder) WithProxyRotator(proxyRotator *proxyrotator.ProxyRotator) *RequestBuilder {
  93. r.proxyRotator = proxyRotator
  94. return r
  95. }
  96. func (r *RequestBuilder) WithCustomApplicationProxyURL(proxyURL *url.URL) *RequestBuilder {
  97. r.clientProxyURL = proxyURL
  98. return r
  99. }
  100. func (r *RequestBuilder) UseCustomApplicationProxyURL(value bool) *RequestBuilder {
  101. r.useClientProxy = value
  102. return r
  103. }
  104. func (r *RequestBuilder) WithCustomFeedProxyURL(proxyURL string) *RequestBuilder {
  105. r.feedProxyURL = proxyURL
  106. return r
  107. }
  108. func (r *RequestBuilder) WithTimeout(timeout time.Duration) *RequestBuilder {
  109. r.clientTimeout = timeout
  110. return r
  111. }
  112. func (r *RequestBuilder) WithoutRedirects() *RequestBuilder {
  113. r.withoutRedirects = true
  114. return r
  115. }
  116. func (r *RequestBuilder) DisableHTTP2(value bool) *RequestBuilder {
  117. r.disableHTTP2 = value
  118. return r
  119. }
  120. func (r *RequestBuilder) IgnoreTLSErrors(value bool) *RequestBuilder {
  121. r.ignoreTLSErrors = value
  122. return r
  123. }
  124. func (r *RequestBuilder) WithoutCompression() *RequestBuilder {
  125. r.disableCompression = true
  126. return r
  127. }
  128. func (r *RequestBuilder) ExecuteRequest(requestURL string) (*http.Response, error) {
  129. var clientProxyURL *url.URL
  130. switch {
  131. case r.feedProxyURL != "":
  132. var err error
  133. clientProxyURL, err = url.Parse(r.feedProxyURL)
  134. if err != nil {
  135. return nil, fmt.Errorf(`fetcher: invalid feed proxy URL %q: %w`, r.feedProxyURL, err)
  136. }
  137. case r.useClientProxy && r.clientProxyURL != nil:
  138. clientProxyURL = r.clientProxyURL
  139. case r.proxyRotator != nil && r.proxyRotator.HasProxies():
  140. clientProxyURL = r.proxyRotator.GetNextProxy()
  141. }
  142. directDialer := &net.Dialer{
  143. Timeout: 10 * time.Second, // Default is 30s.
  144. KeepAlive: 15 * time.Second, // Default is 30s.
  145. }
  146. proxyDialer := &net.Dialer{
  147. Timeout: 10 * time.Second, // Default is 30s.
  148. KeepAlive: 15 * time.Second, // Default is 30s.
  149. }
  150. proxyDialAddress := normalizeProxyDialAddress(clientProxyURL)
  151. // Perform the private-network check inside the dialer's Control callback,
  152. // which fires after DNS resolution but before the TCP connection is made.
  153. // This eliminates TOCTOU / DNS-rebinding vulnerabilities: the resolved IP
  154. // that is checked is exactly the IP that will be connected to.
  155. allowPrivateNetworks := config.Opts == nil || config.Opts.FetcherAllowPrivateNetworks()
  156. if !allowPrivateNetworks {
  157. directDialer.Control = func(network, address string, c syscall.RawConn) error {
  158. host, _, err := net.SplitHostPort(address)
  159. if err != nil {
  160. return err
  161. }
  162. ip := net.ParseIP(host)
  163. if urllib.IsNonPublicIP(ip) {
  164. return fmt.Errorf("%w %q", ErrPrivateNetworkHost, host)
  165. }
  166. return nil
  167. }
  168. }
  169. transport := &http.Transport{
  170. Proxy: http.ProxyFromEnvironment,
  171. // Setting `DialContext` disables HTTP/2, this option forces the transport to try HTTP/2 regardless.
  172. ForceAttemptHTTP2: true,
  173. MaxIdleConns: 50, // Default is 100.
  174. IdleConnTimeout: 10 * time.Second, // Default is 90s.
  175. }
  176. transport.DialContext = directDialer.DialContext
  177. if !allowPrivateNetworks && proxyDialAddress != "" {
  178. // Explicitly configured proxies are a trusted hop. Keep the private-network
  179. // check for direct requests and redirects, but allow the connection to the proxy itself.
  180. transport.DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) {
  181. if normalizeDialAddress(addr) == proxyDialAddress {
  182. return proxyDialer.DialContext(ctx, network, addr)
  183. }
  184. return directDialer.DialContext(ctx, network, addr)
  185. }
  186. }
  187. if r.ignoreTLSErrors {
  188. // Add insecure ciphers if we are ignoring TLS errors. This allows to connect to badly configured servers anyway
  189. ciphers := slices.Concat(tls.CipherSuites(), tls.InsecureCipherSuites())
  190. cipherSuites := make([]uint16, 0, len(ciphers))
  191. for _, cipher := range ciphers {
  192. cipherSuites = append(cipherSuites, cipher.ID)
  193. }
  194. transport.TLSClientConfig = &tls.Config{
  195. CipherSuites: cipherSuites,
  196. InsecureSkipVerify: true,
  197. }
  198. }
  199. if r.disableHTTP2 {
  200. transport.ForceAttemptHTTP2 = false
  201. // https://pkg.go.dev/net/http#hdr-HTTP_2
  202. // Programs that must disable HTTP/2 can do so by setting [Transport.TLSNextProto] (for clients) or [Server.TLSNextProto] (for servers) to a non-nil, empty map.
  203. transport.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{}
  204. }
  205. var clientProxyURLRedacted string
  206. if clientProxyURL != nil {
  207. transport.Proxy = http.ProxyURL(clientProxyURL)
  208. clientProxyURLRedacted = clientProxyURL.Redacted()
  209. }
  210. client := &http.Client{
  211. Timeout: r.clientTimeout,
  212. }
  213. if r.withoutRedirects {
  214. client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
  215. return http.ErrUseLastResponse
  216. }
  217. }
  218. client.Transport = transport
  219. req, err := http.NewRequest("GET", requestURL, nil)
  220. if err != nil {
  221. return nil, err
  222. }
  223. req.Header = r.headers
  224. if r.disableCompression {
  225. req.Header.Set("Accept-Encoding", "identity")
  226. } else {
  227. req.Header.Set("Accept-Encoding", "br,gzip")
  228. }
  229. // Set default Accept header if not already set.
  230. // Note that for the media proxy requests, we need to forward the browser Accept header.
  231. if req.Header.Get("Accept") == "" {
  232. req.Header.Set("Accept", defaultAcceptHeader)
  233. }
  234. req.Header.Set("Connection", "close")
  235. slog.Debug("Making outgoing request", slog.Group("request",
  236. slog.String("method", req.Method),
  237. slog.String("url", req.URL.String()),
  238. slog.Any("headers", req.Header),
  239. slog.Bool("without_redirects", r.withoutRedirects),
  240. slog.Bool("use_app_client_proxy", r.useClientProxy),
  241. slog.String("client_proxy_url", clientProxyURLRedacted),
  242. slog.Bool("ignore_tls_errors", r.ignoreTLSErrors),
  243. slog.Bool("disable_http2", r.disableHTTP2),
  244. ))
  245. return client.Do(req)
  246. }
  247. func normalizeDialAddress(addr string) string {
  248. host, port, err := net.SplitHostPort(addr)
  249. if err != nil {
  250. return ""
  251. }
  252. return net.JoinHostPort(strings.ToLower(host), port)
  253. }
  254. func normalizeProxyDialAddress(proxyURL *url.URL) string {
  255. if proxyURL == nil {
  256. return ""
  257. }
  258. port := proxyURL.Port()
  259. if port == "" {
  260. switch strings.ToLower(proxyURL.Scheme) {
  261. case "", "http":
  262. port = "80"
  263. case "https":
  264. port = "443"
  265. case "socks5", "socks5h":
  266. port = "1080"
  267. default:
  268. return ""
  269. }
  270. }
  271. return net.JoinHostPort(strings.ToLower(proxyURL.Hostname()), port)
  272. }