request_builder.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
  4. import (
  5. "context"
  6. "crypto/tls"
  7. "encoding/base64"
  8. "errors"
  9. "fmt"
  10. "log/slog"
  11. "net"
  12. "net/http"
  13. "net/url"
  14. "slices"
  15. "strings"
  16. "syscall"
  17. "time"
  18. "miniflux.app/v2/internal/config"
  19. "miniflux.app/v2/internal/proxyrotator"
  20. "miniflux.app/v2/internal/urllib"
  21. )
  22. const (
  23. defaultHTTPClientTimeout = 20 * time.Second
  24. defaultAcceptHeader = "application/xml,application/atom+xml,application/rss+xml,application/rdf+xml,application/feed+json,text/html,*/*;q=0.9"
  25. )
  26. var (
  27. ErrHostnameResolution = errors.New("fetcher: unable to resolve request hostname")
  28. ErrPrivateNetworkHost = errors.New("fetcher: refusing to access private network host")
  29. )
  30. type RequestBuilder struct {
  31. headers http.Header
  32. clientProxyURL *url.URL
  33. clientTimeout time.Duration
  34. useClientProxy bool
  35. withoutRedirects bool
  36. ignoreTLSErrors bool
  37. disableHTTP2 bool
  38. disableCompression bool
  39. proxyRotator *proxyrotator.ProxyRotator
  40. feedProxyURL string
  41. }
  42. func NewRequestBuilder() *RequestBuilder {
  43. return &RequestBuilder{
  44. headers: make(http.Header),
  45. clientTimeout: defaultHTTPClientTimeout,
  46. }
  47. }
  48. func (r *RequestBuilder) WithHeader(key, value string) *RequestBuilder {
  49. r.headers.Set(key, value)
  50. return r
  51. }
  52. func (r *RequestBuilder) WithETag(etag string) *RequestBuilder {
  53. if etag != "" {
  54. r.headers.Set("If-None-Match", etag)
  55. }
  56. return r
  57. }
  58. func (r *RequestBuilder) WithLastModified(lastModified string) *RequestBuilder {
  59. if lastModified != "" {
  60. r.headers.Set("If-Modified-Since", lastModified)
  61. }
  62. return r
  63. }
  64. func (r *RequestBuilder) WithUserAgent(userAgent string, defaultUserAgent string) *RequestBuilder {
  65. if userAgent != "" {
  66. r.headers.Set("User-Agent", userAgent)
  67. } else {
  68. r.headers.Set("User-Agent", defaultUserAgent)
  69. }
  70. return r
  71. }
  72. func (r *RequestBuilder) WithCookie(cookie string) *RequestBuilder {
  73. if cookie != "" {
  74. r.headers.Set("Cookie", cookie)
  75. }
  76. return r
  77. }
  78. func (r *RequestBuilder) WithUsernameAndPassword(username, password string) *RequestBuilder {
  79. if username != "" && password != "" {
  80. r.headers.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(username+":"+password)))
  81. }
  82. return r
  83. }
  84. func (r *RequestBuilder) WithProxyRotator(proxyRotator *proxyrotator.ProxyRotator) *RequestBuilder {
  85. r.proxyRotator = proxyRotator
  86. return r
  87. }
  88. func (r *RequestBuilder) WithCustomApplicationProxyURL(proxyURL *url.URL) *RequestBuilder {
  89. r.clientProxyURL = proxyURL
  90. return r
  91. }
  92. func (r *RequestBuilder) UseCustomApplicationProxyURL(value bool) *RequestBuilder {
  93. r.useClientProxy = value
  94. return r
  95. }
  96. func (r *RequestBuilder) WithCustomFeedProxyURL(proxyURL string) *RequestBuilder {
  97. r.feedProxyURL = proxyURL
  98. return r
  99. }
  100. func (r *RequestBuilder) WithTimeout(timeout time.Duration) *RequestBuilder {
  101. r.clientTimeout = timeout
  102. return r
  103. }
  104. func (r *RequestBuilder) WithoutRedirects() *RequestBuilder {
  105. r.withoutRedirects = true
  106. return r
  107. }
  108. func (r *RequestBuilder) DisableHTTP2(value bool) *RequestBuilder {
  109. r.disableHTTP2 = value
  110. return r
  111. }
  112. func (r *RequestBuilder) IgnoreTLSErrors(value bool) *RequestBuilder {
  113. r.ignoreTLSErrors = value
  114. return r
  115. }
  116. func (r *RequestBuilder) WithoutCompression() *RequestBuilder {
  117. r.disableCompression = true
  118. return r
  119. }
  120. func (r *RequestBuilder) ExecuteRequest(requestURL string) (*http.Response, error) {
  121. var clientProxyURL *url.URL
  122. switch {
  123. case r.feedProxyURL != "":
  124. var err error
  125. clientProxyURL, err = url.Parse(r.feedProxyURL)
  126. if err != nil {
  127. return nil, fmt.Errorf(`fetcher: invalid feed proxy URL %q: %w`, r.feedProxyURL, err)
  128. }
  129. case r.useClientProxy && r.clientProxyURL != nil:
  130. clientProxyURL = r.clientProxyURL
  131. case r.proxyRotator != nil && r.proxyRotator.HasProxies():
  132. clientProxyURL = r.proxyRotator.GetNextProxy()
  133. }
  134. directDialer := &net.Dialer{
  135. Timeout: 10 * time.Second, // Default is 30s.
  136. KeepAlive: 15 * time.Second, // Default is 30s.
  137. }
  138. proxyDialer := &net.Dialer{
  139. Timeout: 10 * time.Second, // Default is 30s.
  140. KeepAlive: 15 * time.Second, // Default is 30s.
  141. }
  142. proxyDialAddress := normalizeProxyDialAddress(clientProxyURL)
  143. // Perform the private-network check inside the dialer's Control callback,
  144. // which fires after DNS resolution but before the TCP connection is made.
  145. // This eliminates TOCTOU / DNS-rebinding vulnerabilities: the resolved IP
  146. // that is checked is exactly the IP that will be connected to.
  147. allowPrivateNetworks := config.Opts == nil || config.Opts.FetcherAllowPrivateNetworks()
  148. if !allowPrivateNetworks {
  149. directDialer.Control = func(network, address string, c syscall.RawConn) error {
  150. host, _, err := net.SplitHostPort(address)
  151. if err != nil {
  152. return err
  153. }
  154. ip := net.ParseIP(host)
  155. if urllib.IsNonPublicIP(ip) {
  156. return fmt.Errorf("%w %q", ErrPrivateNetworkHost, host)
  157. }
  158. return nil
  159. }
  160. }
  161. transport := &http.Transport{
  162. Proxy: http.ProxyFromEnvironment,
  163. // Setting `DialContext` disables HTTP/2, this option forces the transport to try HTTP/2 regardless.
  164. ForceAttemptHTTP2: true,
  165. MaxIdleConns: 50, // Default is 100.
  166. IdleConnTimeout: 10 * time.Second, // Default is 90s.
  167. }
  168. transport.DialContext = directDialer.DialContext
  169. if !allowPrivateNetworks && proxyDialAddress != "" {
  170. // Explicitly configured proxies are a trusted hop. Keep the private-network
  171. // check for direct requests and redirects, but allow the connection to the proxy itself.
  172. transport.DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) {
  173. if normalizeDialAddress(addr) == proxyDialAddress {
  174. return proxyDialer.DialContext(ctx, network, addr)
  175. }
  176. return directDialer.DialContext(ctx, network, addr)
  177. }
  178. }
  179. if r.ignoreTLSErrors {
  180. // Add insecure ciphers if we are ignoring TLS errors. This allows to connect to badly configured servers anyway
  181. ciphers := slices.Concat(tls.CipherSuites(), tls.InsecureCipherSuites())
  182. cipherSuites := make([]uint16, 0, len(ciphers))
  183. for _, cipher := range ciphers {
  184. cipherSuites = append(cipherSuites, cipher.ID)
  185. }
  186. transport.TLSClientConfig = &tls.Config{
  187. CipherSuites: cipherSuites,
  188. InsecureSkipVerify: true,
  189. }
  190. }
  191. if r.disableHTTP2 {
  192. transport.ForceAttemptHTTP2 = false
  193. // https://pkg.go.dev/net/http#hdr-HTTP_2
  194. // Programs that must disable HTTP/2 can do so by setting [Transport.TLSNextProto] (for clients) or [Server.TLSNextProto] (for servers) to a non-nil, empty map.
  195. transport.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{}
  196. }
  197. var clientProxyURLRedacted string
  198. if clientProxyURL != nil {
  199. transport.Proxy = http.ProxyURL(clientProxyURL)
  200. clientProxyURLRedacted = clientProxyURL.Redacted()
  201. }
  202. client := &http.Client{
  203. Timeout: r.clientTimeout,
  204. }
  205. if r.withoutRedirects {
  206. client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
  207. return http.ErrUseLastResponse
  208. }
  209. }
  210. client.Transport = transport
  211. req, err := http.NewRequest("GET", requestURL, nil)
  212. if err != nil {
  213. return nil, err
  214. }
  215. req.Header = r.headers
  216. if r.disableCompression {
  217. req.Header.Set("Accept-Encoding", "identity")
  218. } else {
  219. req.Header.Set("Accept-Encoding", "br,gzip")
  220. }
  221. // Set default Accept header if not already set.
  222. // Note that for the media proxy requests, we need to forward the browser Accept header.
  223. if req.Header.Get("Accept") == "" {
  224. req.Header.Set("Accept", defaultAcceptHeader)
  225. }
  226. req.Header.Set("Connection", "close")
  227. slog.Debug("Making outgoing request", slog.Group("request",
  228. slog.String("method", req.Method),
  229. slog.String("url", req.URL.String()),
  230. slog.Any("headers", req.Header),
  231. slog.Bool("without_redirects", r.withoutRedirects),
  232. slog.Bool("use_app_client_proxy", r.useClientProxy),
  233. slog.String("client_proxy_url", clientProxyURLRedacted),
  234. slog.Bool("ignore_tls_errors", r.ignoreTLSErrors),
  235. slog.Bool("disable_http2", r.disableHTTP2),
  236. ))
  237. return client.Do(req)
  238. }
  239. func normalizeDialAddress(addr string) string {
  240. host, port, err := net.SplitHostPort(addr)
  241. if err != nil {
  242. return ""
  243. }
  244. return net.JoinHostPort(strings.ToLower(host), port)
  245. }
  246. func normalizeProxyDialAddress(proxyURL *url.URL) string {
  247. if proxyURL == nil {
  248. return ""
  249. }
  250. port := proxyURL.Port()
  251. if port == "" {
  252. switch strings.ToLower(proxyURL.Scheme) {
  253. case "", "http":
  254. port = "80"
  255. case "https":
  256. port = "443"
  257. case "socks5", "socks5h":
  258. port = "1080"
  259. default:
  260. return ""
  261. }
  262. }
  263. return net.JoinHostPort(strings.ToLower(proxyURL.Hostname()), port)
  264. }