4
0

request_builder.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
  4. import (
  5. "crypto/tls"
  6. "encoding/base64"
  7. "errors"
  8. "fmt"
  9. "log/slog"
  10. "net"
  11. "net/http"
  12. "net/url"
  13. "slices"
  14. "syscall"
  15. "time"
  16. "miniflux.app/v2/internal/config"
  17. "miniflux.app/v2/internal/proxyrotator"
  18. "miniflux.app/v2/internal/urllib"
  19. )
  20. const (
  21. defaultHTTPClientTimeout = 20 * time.Second
  22. defaultAcceptHeader = "application/xml,application/atom+xml,application/rss+xml,application/rdf+xml,application/feed+json,text/html,*/*;q=0.9"
  23. )
  24. var (
  25. ErrHostnameResolution = errors.New("fetcher: unable to resolve request hostname")
  26. ErrPrivateNetworkHost = errors.New("fetcher: refusing to access private network host")
  27. )
  28. type RequestBuilder struct {
  29. headers http.Header
  30. clientProxyURL *url.URL
  31. clientTimeout time.Duration
  32. useClientProxy bool
  33. withoutRedirects bool
  34. ignoreTLSErrors bool
  35. disableHTTP2 bool
  36. disableCompression bool
  37. proxyRotator *proxyrotator.ProxyRotator
  38. feedProxyURL string
  39. }
  40. func NewRequestBuilder() *RequestBuilder {
  41. return &RequestBuilder{
  42. headers: make(http.Header),
  43. clientTimeout: defaultHTTPClientTimeout,
  44. }
  45. }
  46. func (r *RequestBuilder) WithHeader(key, value string) *RequestBuilder {
  47. r.headers.Set(key, value)
  48. return r
  49. }
  50. func (r *RequestBuilder) WithETag(etag string) *RequestBuilder {
  51. if etag != "" {
  52. r.headers.Set("If-None-Match", etag)
  53. }
  54. return r
  55. }
  56. func (r *RequestBuilder) WithLastModified(lastModified string) *RequestBuilder {
  57. if lastModified != "" {
  58. r.headers.Set("If-Modified-Since", lastModified)
  59. }
  60. return r
  61. }
  62. func (r *RequestBuilder) WithUserAgent(userAgent string, defaultUserAgent string) *RequestBuilder {
  63. if userAgent != "" {
  64. r.headers.Set("User-Agent", userAgent)
  65. } else {
  66. r.headers.Set("User-Agent", defaultUserAgent)
  67. }
  68. return r
  69. }
  70. func (r *RequestBuilder) WithCookie(cookie string) *RequestBuilder {
  71. if cookie != "" {
  72. r.headers.Set("Cookie", cookie)
  73. }
  74. return r
  75. }
  76. func (r *RequestBuilder) WithUsernameAndPassword(username, password string) *RequestBuilder {
  77. if username != "" && password != "" {
  78. r.headers.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(username+":"+password)))
  79. }
  80. return r
  81. }
  82. func (r *RequestBuilder) WithProxyRotator(proxyRotator *proxyrotator.ProxyRotator) *RequestBuilder {
  83. r.proxyRotator = proxyRotator
  84. return r
  85. }
  86. func (r *RequestBuilder) WithCustomApplicationProxyURL(proxyURL *url.URL) *RequestBuilder {
  87. r.clientProxyURL = proxyURL
  88. return r
  89. }
  90. func (r *RequestBuilder) UseCustomApplicationProxyURL(value bool) *RequestBuilder {
  91. r.useClientProxy = value
  92. return r
  93. }
  94. func (r *RequestBuilder) WithCustomFeedProxyURL(proxyURL string) *RequestBuilder {
  95. r.feedProxyURL = proxyURL
  96. return r
  97. }
  98. func (r *RequestBuilder) WithTimeout(timeout time.Duration) *RequestBuilder {
  99. r.clientTimeout = timeout
  100. return r
  101. }
  102. func (r *RequestBuilder) WithoutRedirects() *RequestBuilder {
  103. r.withoutRedirects = true
  104. return r
  105. }
  106. func (r *RequestBuilder) DisableHTTP2(value bool) *RequestBuilder {
  107. r.disableHTTP2 = value
  108. return r
  109. }
  110. func (r *RequestBuilder) IgnoreTLSErrors(value bool) *RequestBuilder {
  111. r.ignoreTLSErrors = value
  112. return r
  113. }
  114. func (r *RequestBuilder) WithoutCompression() *RequestBuilder {
  115. r.disableCompression = true
  116. return r
  117. }
  118. func (r *RequestBuilder) ExecuteRequest(requestURL string) (*http.Response, error) {
  119. dialer := &net.Dialer{
  120. Timeout: 10 * time.Second, // Default is 30s.
  121. KeepAlive: 15 * time.Second, // Default is 30s.
  122. }
  123. // Perform the private-network check inside the dialer's Control callback,
  124. // which fires after DNS resolution but before the TCP connection is made.
  125. // This eliminates TOCTOU / DNS-rebinding vulnerabilities: the resolved IP
  126. // that is checked is exactly the IP that will be connected to.
  127. allowPrivateNetworks := config.Opts == nil || config.Opts.FetcherAllowPrivateNetworks()
  128. if !allowPrivateNetworks {
  129. dialer.Control = func(network, address string, c syscall.RawConn) error {
  130. host, _, err := net.SplitHostPort(address)
  131. if err != nil {
  132. return err
  133. }
  134. ip := net.ParseIP(host)
  135. if urllib.IsNonPublicIP(ip) {
  136. return fmt.Errorf("%w %q", ErrPrivateNetworkHost, host)
  137. }
  138. return nil
  139. }
  140. }
  141. transport := &http.Transport{
  142. Proxy: http.ProxyFromEnvironment,
  143. // Setting `DialContext` disables HTTP/2, this option forces the transport to try HTTP/2 regardless.
  144. ForceAttemptHTTP2: true,
  145. DialContext: dialer.DialContext,
  146. MaxIdleConns: 50, // Default is 100.
  147. IdleConnTimeout: 10 * time.Second, // Default is 90s.
  148. }
  149. if r.ignoreTLSErrors {
  150. // Add insecure ciphers if we are ignoring TLS errors. This allows to connect to badly configured servers anyway
  151. ciphers := slices.Concat(tls.CipherSuites(), tls.InsecureCipherSuites())
  152. cipherSuites := make([]uint16, 0, len(ciphers))
  153. for _, cipher := range ciphers {
  154. cipherSuites = append(cipherSuites, cipher.ID)
  155. }
  156. transport.TLSClientConfig = &tls.Config{
  157. CipherSuites: cipherSuites,
  158. InsecureSkipVerify: true,
  159. }
  160. }
  161. if r.disableHTTP2 {
  162. transport.ForceAttemptHTTP2 = false
  163. // https://pkg.go.dev/net/http#hdr-HTTP_2
  164. // Programs that must disable HTTP/2 can do so by setting [Transport.TLSNextProto] (for clients) or [Server.TLSNextProto] (for servers) to a non-nil, empty map.
  165. transport.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{}
  166. }
  167. var clientProxyURL *url.URL
  168. switch {
  169. case r.feedProxyURL != "":
  170. var err error
  171. clientProxyURL, err = url.Parse(r.feedProxyURL)
  172. if err != nil {
  173. return nil, fmt.Errorf(`fetcher: invalid feed proxy URL %q: %w`, r.feedProxyURL, err)
  174. }
  175. case r.useClientProxy && r.clientProxyURL != nil:
  176. clientProxyURL = r.clientProxyURL
  177. case r.proxyRotator != nil && r.proxyRotator.HasProxies():
  178. clientProxyURL = r.proxyRotator.GetNextProxy()
  179. }
  180. var clientProxyURLRedacted string
  181. if clientProxyURL != nil {
  182. transport.Proxy = http.ProxyURL(clientProxyURL)
  183. clientProxyURLRedacted = clientProxyURL.Redacted()
  184. }
  185. client := &http.Client{
  186. Timeout: r.clientTimeout,
  187. }
  188. if r.withoutRedirects {
  189. client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
  190. return http.ErrUseLastResponse
  191. }
  192. }
  193. client.Transport = transport
  194. req, err := http.NewRequest("GET", requestURL, nil)
  195. if err != nil {
  196. return nil, err
  197. }
  198. req.Header = r.headers
  199. if r.disableCompression {
  200. req.Header.Set("Accept-Encoding", "identity")
  201. } else {
  202. req.Header.Set("Accept-Encoding", "br,gzip")
  203. }
  204. // Set default Accept header if not already set.
  205. // Note that for the media proxy requests, we need to forward the browser Accept header.
  206. if req.Header.Get("Accept") == "" {
  207. req.Header.Set("Accept", defaultAcceptHeader)
  208. }
  209. req.Header.Set("Connection", "close")
  210. slog.Debug("Making outgoing request", slog.Group("request",
  211. slog.String("method", req.Method),
  212. slog.String("url", req.URL.String()),
  213. slog.Any("headers", req.Header),
  214. slog.Bool("without_redirects", r.withoutRedirects),
  215. slog.Bool("use_app_client_proxy", r.useClientProxy),
  216. slog.String("client_proxy_url", clientProxyURLRedacted),
  217. slog.Bool("ignore_tls_errors", r.ignoreTLSErrors),
  218. slog.Bool("disable_http2", r.disableHTTP2),
  219. ))
  220. return client.Do(req)
  221. }