request_builder.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
  4. import (
  5. "crypto/tls"
  6. "encoding/base64"
  7. "fmt"
  8. "log/slog"
  9. "net"
  10. "net/http"
  11. "net/url"
  12. "slices"
  13. "time"
  14. "miniflux.app/v2/internal/proxyrotator"
  15. )
  16. const (
  17. defaultHTTPClientTimeout = 20 * time.Second
  18. defaultAcceptHeader = "application/xml, application/atom+xml, application/rss+xml, application/rdf+xml, application/feed+json, text/html, */*;q=0.9"
  19. )
  20. type RequestBuilder struct {
  21. headers http.Header
  22. clientProxyURL *url.URL
  23. clientTimeout time.Duration
  24. useClientProxy bool
  25. withoutRedirects bool
  26. ignoreTLSErrors bool
  27. disableHTTP2 bool
  28. disableCompression bool
  29. proxyRotator *proxyrotator.ProxyRotator
  30. feedProxyURL string
  31. }
  32. func NewRequestBuilder() *RequestBuilder {
  33. return &RequestBuilder{
  34. headers: make(http.Header),
  35. clientTimeout: defaultHTTPClientTimeout,
  36. }
  37. }
  38. func (r *RequestBuilder) WithHeader(key, value string) *RequestBuilder {
  39. r.headers.Set(key, value)
  40. return r
  41. }
  42. func (r *RequestBuilder) WithETag(etag string) *RequestBuilder {
  43. if etag != "" {
  44. r.headers.Set("If-None-Match", etag)
  45. }
  46. return r
  47. }
  48. func (r *RequestBuilder) WithLastModified(lastModified string) *RequestBuilder {
  49. if lastModified != "" {
  50. r.headers.Set("If-Modified-Since", lastModified)
  51. }
  52. return r
  53. }
  54. func (r *RequestBuilder) WithUserAgent(userAgent string, defaultUserAgent string) *RequestBuilder {
  55. if userAgent != "" {
  56. r.headers.Set("User-Agent", userAgent)
  57. } else {
  58. r.headers.Set("User-Agent", defaultUserAgent)
  59. }
  60. return r
  61. }
  62. func (r *RequestBuilder) WithCookie(cookie string) *RequestBuilder {
  63. if cookie != "" {
  64. r.headers.Set("Cookie", cookie)
  65. }
  66. return r
  67. }
  68. func (r *RequestBuilder) WithUsernameAndPassword(username, password string) *RequestBuilder {
  69. if username != "" && password != "" {
  70. r.headers.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(username+":"+password)))
  71. }
  72. return r
  73. }
  74. func (r *RequestBuilder) WithProxyRotator(proxyRotator *proxyrotator.ProxyRotator) *RequestBuilder {
  75. r.proxyRotator = proxyRotator
  76. return r
  77. }
  78. func (r *RequestBuilder) WithCustomApplicationProxyURL(proxyURL *url.URL) *RequestBuilder {
  79. r.clientProxyURL = proxyURL
  80. return r
  81. }
  82. func (r *RequestBuilder) UseCustomApplicationProxyURL(value bool) *RequestBuilder {
  83. r.useClientProxy = value
  84. return r
  85. }
  86. func (r *RequestBuilder) WithCustomFeedProxyURL(proxyURL string) *RequestBuilder {
  87. r.feedProxyURL = proxyURL
  88. return r
  89. }
  90. func (r *RequestBuilder) WithTimeout(timeout time.Duration) *RequestBuilder {
  91. r.clientTimeout = timeout
  92. return r
  93. }
  94. func (r *RequestBuilder) WithoutRedirects() *RequestBuilder {
  95. r.withoutRedirects = true
  96. return r
  97. }
  98. func (r *RequestBuilder) DisableHTTP2(value bool) *RequestBuilder {
  99. r.disableHTTP2 = value
  100. return r
  101. }
  102. func (r *RequestBuilder) IgnoreTLSErrors(value bool) *RequestBuilder {
  103. r.ignoreTLSErrors = value
  104. return r
  105. }
  106. func (r *RequestBuilder) WithoutCompression() *RequestBuilder {
  107. r.disableCompression = true
  108. return r
  109. }
  110. func (r *RequestBuilder) ExecuteRequest(requestURL string) (*http.Response, error) {
  111. transport := &http.Transport{
  112. Proxy: http.ProxyFromEnvironment,
  113. // Setting `DialContext` disables HTTP/2, this option forces the transport to try HTTP/2 regardless.
  114. ForceAttemptHTTP2: true,
  115. DialContext: (&net.Dialer{
  116. Timeout: 10 * time.Second, // Default is 30s.
  117. KeepAlive: 15 * time.Second, // Default is 30s.
  118. }).DialContext,
  119. MaxIdleConns: 50, // Default is 100.
  120. IdleConnTimeout: 10 * time.Second, // Default is 90s.
  121. }
  122. if r.ignoreTLSErrors {
  123. // Add insecure ciphers if we are ignoring TLS errors. This allows to connect to badly configured servers anyway
  124. ciphers := slices.Concat(tls.CipherSuites(), tls.InsecureCipherSuites())
  125. cipherSuites := make([]uint16, 0, len(ciphers))
  126. for _, cipher := range ciphers {
  127. cipherSuites = append(cipherSuites, cipher.ID)
  128. }
  129. transport.TLSClientConfig = &tls.Config{
  130. CipherSuites: cipherSuites,
  131. InsecureSkipVerify: true,
  132. }
  133. }
  134. if r.disableHTTP2 {
  135. transport.ForceAttemptHTTP2 = false
  136. // https://pkg.go.dev/net/http#hdr-HTTP_2
  137. // Programs that must disable HTTP/2 can do so by setting [Transport.TLSNextProto] (for clients) or [Server.TLSNextProto] (for servers) to a non-nil, empty map.
  138. transport.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{}
  139. }
  140. var clientProxyURL *url.URL
  141. switch {
  142. case r.feedProxyURL != "":
  143. var err error
  144. clientProxyURL, err = url.Parse(r.feedProxyURL)
  145. if err != nil {
  146. return nil, fmt.Errorf(`fetcher: invalid feed proxy URL %q: %w`, r.feedProxyURL, err)
  147. }
  148. case r.useClientProxy && r.clientProxyURL != nil:
  149. clientProxyURL = r.clientProxyURL
  150. case r.proxyRotator != nil && r.proxyRotator.HasProxies():
  151. clientProxyURL = r.proxyRotator.GetNextProxy()
  152. }
  153. var clientProxyURLRedacted string
  154. if clientProxyURL != nil {
  155. transport.Proxy = http.ProxyURL(clientProxyURL)
  156. clientProxyURLRedacted = clientProxyURL.Redacted()
  157. }
  158. client := &http.Client{
  159. Timeout: r.clientTimeout,
  160. }
  161. if r.withoutRedirects {
  162. client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
  163. return http.ErrUseLastResponse
  164. }
  165. }
  166. client.Transport = transport
  167. req, err := http.NewRequest("GET", requestURL, nil)
  168. if err != nil {
  169. return nil, err
  170. }
  171. req.Header = r.headers
  172. if r.disableCompression {
  173. req.Header.Set("Accept-Encoding", "identity")
  174. } else {
  175. req.Header.Set("Accept-Encoding", "br, gzip")
  176. }
  177. // Set default Accept header if not already set.
  178. // Note that for the media proxy requests, we need to forward the browser Accept header.
  179. if req.Header.Get("Accept") == "" {
  180. req.Header.Set("Accept", defaultAcceptHeader)
  181. }
  182. req.Header.Set("Connection", "close")
  183. slog.Debug("Making outgoing request", slog.Group("request",
  184. slog.String("method", req.Method),
  185. slog.String("url", req.URL.String()),
  186. slog.Any("headers", req.Header),
  187. slog.Bool("without_redirects", r.withoutRedirects),
  188. slog.Bool("use_app_client_proxy", r.useClientProxy),
  189. slog.String("client_proxy_url", clientProxyURLRedacted),
  190. slog.Bool("ignore_tls_errors", r.ignoreTLSErrors),
  191. slog.Bool("disable_http2", r.disableHTTP2),
  192. ))
  193. return client.Do(req)
  194. }