response.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package client // import "miniflux.app/http/client"
  4. import (
  5. "bytes"
  6. "fmt"
  7. "io"
  8. "regexp"
  9. "strings"
  10. "unicode/utf8"
  11. "golang.org/x/net/html/charset"
  12. )
  13. var xmlEncodingRegex = regexp.MustCompile(`<\?xml(.*)encoding=["'](.+)["'](.*)\?>`)
  14. // Response wraps a server response.
  15. type Response struct {
  16. Body io.Reader
  17. StatusCode int
  18. EffectiveURL string
  19. LastModified string
  20. ETag string
  21. Expires string
  22. ContentType string
  23. ContentLength int64
  24. }
  25. func (r *Response) String() string {
  26. return fmt.Sprintf(
  27. `StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
  28. r.StatusCode,
  29. r.EffectiveURL,
  30. r.LastModified,
  31. r.ETag,
  32. r.Expires,
  33. r.ContentType,
  34. r.ContentLength,
  35. )
  36. }
  37. // IsNotFound returns true if the resource doesn't exist anymore.
  38. func (r *Response) IsNotFound() bool {
  39. return r.StatusCode == 404 || r.StatusCode == 410
  40. }
  41. // IsNotAuthorized returns true if the resource require authentication.
  42. func (r *Response) IsNotAuthorized() bool {
  43. return r.StatusCode == 401
  44. }
  45. // HasServerFailure returns true if the status code represents a failure.
  46. func (r *Response) HasServerFailure() bool {
  47. return r.StatusCode >= 400
  48. }
  49. // IsModified returns true if the resource has been modified.
  50. func (r *Response) IsModified(etag, lastModified string) bool {
  51. if r.StatusCode == 304 {
  52. return false
  53. }
  54. if r.ETag != "" && r.ETag == etag {
  55. return false
  56. }
  57. if r.LastModified != "" && r.LastModified == lastModified {
  58. return false
  59. }
  60. return true
  61. }
  62. // EnsureUnicodeBody makes sure the body is encoded in UTF-8.
  63. //
  64. // If a charset other than UTF-8 is detected, we convert the document to UTF-8.
  65. // This is used by the scraper and feed readers.
  66. //
  67. // Do not forget edge cases:
  68. //
  69. // - Feeds with encoding specified only in Content-Type header and not in XML document
  70. // - Feeds with encoding specified in both places
  71. // - Feeds with encoding specified only in XML document and not in HTTP header
  72. // - Feeds with wrong encoding defined and already in UTF-8
  73. func (r *Response) EnsureUnicodeBody() (err error) {
  74. buffer, err := io.ReadAll(r.Body)
  75. if err != nil {
  76. return err
  77. }
  78. r.Body = bytes.NewReader(buffer)
  79. if utf8.Valid(buffer) {
  80. return nil
  81. }
  82. if strings.Contains(r.ContentType, "xml") {
  83. // We ignore documents with encoding specified in XML prolog.
  84. // This is going to be handled by the XML parser.
  85. length := 1024
  86. if len(buffer) < 1024 {
  87. length = len(buffer)
  88. }
  89. if xmlEncodingRegex.Match(buffer[0:length]) {
  90. return nil
  91. }
  92. }
  93. r.Body, err = charset.NewReader(r.Body, r.ContentType)
  94. return err
  95. }
  96. // BodyAsString returns the response body as string.
  97. func (r *Response) BodyAsString() string {
  98. bytes, _ := io.ReadAll(r.Body)
  99. return string(bytes)
  100. }