response_handler.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
  4. import (
  5. "crypto/x509"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "log/slog"
  10. "net"
  11. "net/http"
  12. "net/url"
  13. "os"
  14. "strconv"
  15. "strings"
  16. "time"
  17. "miniflux.app/v2/internal/locale"
  18. )
  19. type ResponseHandler struct {
  20. httpResponse *http.Response
  21. clientErr error
  22. }
  23. func NewResponseHandler(httpResponse *http.Response, clientErr error) *ResponseHandler {
  24. return &ResponseHandler{httpResponse: httpResponse, clientErr: clientErr}
  25. }
  26. func (r *ResponseHandler) EffectiveURL() string {
  27. return r.httpResponse.Request.URL.String()
  28. }
  29. func (r *ResponseHandler) ContentType() string {
  30. return r.httpResponse.Header.Get("Content-Type")
  31. }
  32. func (r *ResponseHandler) LastModified() string {
  33. // Ignore caching headers for feeds that do not want any cache.
  34. if r.httpResponse.Header.Get("Expires") == "0" {
  35. return ""
  36. }
  37. return r.httpResponse.Header.Get("Last-Modified")
  38. }
  39. func (r *ResponseHandler) ETag() string {
  40. // Ignore caching headers for feeds that do not want any cache.
  41. if r.httpResponse.Header.Get("Expires") == "0" {
  42. return ""
  43. }
  44. return r.httpResponse.Header.Get("ETag")
  45. }
  46. func (r *ResponseHandler) Expires() time.Duration {
  47. expiresHeaderValue := r.httpResponse.Header.Get("Expires")
  48. if expiresHeaderValue != "" {
  49. t, err := time.Parse(time.RFC1123, expiresHeaderValue)
  50. if err == nil {
  51. // This rounds up to the next minute by rounding down and just adding a minute.
  52. return time.Until(t).Truncate(time.Minute) + time.Minute
  53. }
  54. }
  55. return 0
  56. }
  57. func (r *ResponseHandler) CacheControlMaxAge() time.Duration {
  58. cacheControlHeaderValue := r.httpResponse.Header.Get("Cache-Control")
  59. if cacheControlHeaderValue != "" {
  60. for _, directive := range strings.Split(cacheControlHeaderValue, ",") {
  61. directive = strings.TrimSpace(directive)
  62. if strings.HasPrefix(directive, "max-age=") {
  63. maxAge, err := strconv.Atoi(strings.TrimPrefix(directive, "max-age="))
  64. if err == nil {
  65. return time.Duration(maxAge) * time.Second
  66. }
  67. }
  68. }
  69. }
  70. return 0
  71. }
  72. func (r *ResponseHandler) ParseRetryDelay() time.Duration {
  73. retryAfterHeaderValue := r.httpResponse.Header.Get("Retry-After")
  74. if retryAfterHeaderValue != "" {
  75. // First, try to parse as an integer (number of seconds)
  76. if seconds, err := strconv.Atoi(retryAfterHeaderValue); err == nil {
  77. return time.Duration(seconds) * time.Second
  78. }
  79. // If not an integer, try to parse as an HTTP-date
  80. if t, err := time.Parse(time.RFC1123, retryAfterHeaderValue); err == nil {
  81. return time.Until(t).Truncate(time.Second)
  82. }
  83. }
  84. return 0
  85. }
  86. func (r *ResponseHandler) IsRateLimited() bool {
  87. return r.httpResponse != nil && r.httpResponse.StatusCode == http.StatusTooManyRequests
  88. }
  89. func (r *ResponseHandler) IsModified(lastEtagValue, lastModifiedValue string) bool {
  90. if r.httpResponse.StatusCode == http.StatusNotModified {
  91. return false
  92. }
  93. if r.ETag() != "" {
  94. return r.ETag() != lastEtagValue
  95. }
  96. if r.LastModified() != "" {
  97. return r.LastModified() != lastModifiedValue
  98. }
  99. return true
  100. }
  101. func (r *ResponseHandler) IsRedirect() bool {
  102. return r.httpResponse != nil &&
  103. (r.httpResponse.StatusCode == http.StatusMovedPermanently ||
  104. r.httpResponse.StatusCode == http.StatusFound ||
  105. r.httpResponse.StatusCode == http.StatusSeeOther ||
  106. r.httpResponse.StatusCode == http.StatusTemporaryRedirect ||
  107. r.httpResponse.StatusCode == http.StatusPermanentRedirect)
  108. }
  109. func (r *ResponseHandler) Close() {
  110. if r.httpResponse != nil && r.httpResponse.Body != nil && r.clientErr == nil {
  111. r.httpResponse.Body.Close()
  112. }
  113. }
  114. func (r *ResponseHandler) getReader(maxBodySize int64) io.ReadCloser {
  115. contentEncoding := strings.ToLower(r.httpResponse.Header.Get("Content-Encoding"))
  116. slog.Debug("Request response",
  117. slog.String("effective_url", r.EffectiveURL()),
  118. slog.String("content_length", r.httpResponse.Header.Get("Content-Length")),
  119. slog.String("content_encoding", contentEncoding),
  120. slog.String("content_type", r.httpResponse.Header.Get("Content-Type")),
  121. )
  122. reader := r.httpResponse.Body
  123. switch contentEncoding {
  124. case "br":
  125. reader = NewBrotliReadCloser(r.httpResponse.Body)
  126. case "gzip":
  127. reader = NewGzipReadCloser(r.httpResponse.Body)
  128. }
  129. return http.MaxBytesReader(nil, reader, maxBodySize)
  130. }
  131. func (r *ResponseHandler) Body(maxBodySize int64) io.ReadCloser {
  132. return r.getReader(maxBodySize)
  133. }
  134. func (r *ResponseHandler) ReadBody(maxBodySize int64) ([]byte, *locale.LocalizedErrorWrapper) {
  135. limitedReader := r.getReader(maxBodySize)
  136. buffer, err := io.ReadAll(limitedReader)
  137. if err != nil && err != io.EOF {
  138. if err, ok := err.(*http.MaxBytesError); ok {
  139. return nil, locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: response body too large: %d bytes", err.Limit), "error.http_response_too_large")
  140. }
  141. return nil, locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: unable to read response body: %w", err), "error.http_body_read", err)
  142. }
  143. if len(buffer) == 0 {
  144. return nil, locale.NewLocalizedErrorWrapper(errors.New("fetcher: empty response body"), "error.http_empty_response_body")
  145. }
  146. return buffer, nil
  147. }
  148. func (r *ResponseHandler) LocalizedError() *locale.LocalizedErrorWrapper {
  149. if r.clientErr != nil {
  150. switch {
  151. case isSSLError(r.clientErr):
  152. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.tls_error", r.clientErr)
  153. case isNetworkError(r.clientErr):
  154. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.network_operation", r.clientErr)
  155. case os.IsTimeout(r.clientErr):
  156. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.network_timeout", r.clientErr)
  157. case errors.Is(r.clientErr, io.EOF):
  158. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.http_empty_response")
  159. default:
  160. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.http_client_error", r.clientErr)
  161. }
  162. }
  163. switch r.httpResponse.StatusCode {
  164. case http.StatusUnauthorized:
  165. return locale.NewLocalizedErrorWrapper(errors.New("fetcher: access unauthorized (401 status code)"), "error.http_not_authorized")
  166. case http.StatusForbidden:
  167. return locale.NewLocalizedErrorWrapper(errors.New("fetcher: access forbidden (403 status code)"), "error.http_forbidden")
  168. case http.StatusTooManyRequests:
  169. return locale.NewLocalizedErrorWrapper(errors.New("fetcher: too many requests (429 status code)"), "error.http_too_many_requests")
  170. case http.StatusNotFound, http.StatusGone:
  171. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: resource not found (%d status code)", r.httpResponse.StatusCode), "error.http_resource_not_found")
  172. case http.StatusInternalServerError:
  173. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: remote server error (%d status code)", r.httpResponse.StatusCode), "error.http_internal_server_error")
  174. case http.StatusBadGateway:
  175. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: bad gateway (%d status code)", r.httpResponse.StatusCode), "error.http_bad_gateway")
  176. case http.StatusServiceUnavailable:
  177. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: service unavailable (%d status code)", r.httpResponse.StatusCode), "error.http_service_unavailable")
  178. case http.StatusGatewayTimeout:
  179. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: gateway timeout (%d status code)", r.httpResponse.StatusCode), "error.http_gateway_timeout")
  180. }
  181. if r.httpResponse.StatusCode >= 400 {
  182. return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: unexpected status code (%d status code)", r.httpResponse.StatusCode), "error.http_unexpected_status_code", r.httpResponse.StatusCode)
  183. }
  184. if r.httpResponse.StatusCode != 304 {
  185. // Content-Length = -1 when no Content-Length header is sent.
  186. if r.httpResponse.ContentLength == 0 {
  187. return locale.NewLocalizedErrorWrapper(errors.New("fetcher: empty response body"), "error.http_empty_response_body")
  188. }
  189. }
  190. return nil
  191. }
  192. func isNetworkError(err error) bool {
  193. if _, ok := err.(*url.Error); ok {
  194. return true
  195. }
  196. if err == io.EOF {
  197. return true
  198. }
  199. var opErr *net.OpError
  200. if ok := errors.As(err, &opErr); ok {
  201. return true
  202. }
  203. return false
  204. }
  205. func isSSLError(err error) bool {
  206. var certErr x509.UnknownAuthorityError
  207. if errors.As(err, &certErr) {
  208. return true
  209. }
  210. var hostErr x509.HostnameError
  211. if errors.As(err, &hostErr) {
  212. return true
  213. }
  214. var algErr x509.InsecureAlgorithmError
  215. return errors.As(err, &algErr)
  216. }