response.go 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package client // import "miniflux.app/http/client"
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "regexp"
  11. "strings"
  12. "unicode/utf8"
  13. "golang.org/x/net/html/charset"
  14. )
  15. var xmlEncodingRegex = regexp.MustCompile(`<\?xml(.*)encoding=["'](.+)["'](.*)\?>`)
  16. // Response wraps a server response.
  17. type Response struct {
  18. Body io.Reader
  19. StatusCode int
  20. EffectiveURL string
  21. LastModified string
  22. ETag string
  23. Expires string
  24. ContentType string
  25. ContentLength int64
  26. }
  27. func (r *Response) String() string {
  28. return fmt.Sprintf(
  29. `StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
  30. r.StatusCode,
  31. r.EffectiveURL,
  32. r.LastModified,
  33. r.ETag,
  34. r.Expires,
  35. r.ContentType,
  36. r.ContentLength,
  37. )
  38. }
  39. // IsNotFound returns true if the resource doesn't exists anymore.
  40. func (r *Response) IsNotFound() bool {
  41. return r.StatusCode == 404 || r.StatusCode == 410
  42. }
  43. // IsNotAuthorized returns true if the resource require authentication.
  44. func (r *Response) IsNotAuthorized() bool {
  45. return r.StatusCode == 401
  46. }
  47. // HasServerFailure returns true if the status code represents a failure.
  48. func (r *Response) HasServerFailure() bool {
  49. return r.StatusCode >= 400
  50. }
  51. // IsModified returns true if the resource has been modified.
  52. func (r *Response) IsModified(etag, lastModified string) bool {
  53. if r.StatusCode == 304 {
  54. return false
  55. }
  56. if r.ETag != "" && r.ETag == etag {
  57. return false
  58. }
  59. if r.LastModified != "" && r.LastModified == lastModified {
  60. return false
  61. }
  62. return true
  63. }
  64. // EnsureUnicodeBody makes sure the body is encoded in UTF-8.
  65. //
  66. // If a charset other than UTF-8 is detected, we convert the document to UTF-8.
  67. // This is used by the scraper and feed readers.
  68. //
  69. // Do not forget edge cases:
  70. //
  71. // - Feeds with encoding specified only in Content-Type header and not in XML document
  72. // - Feeds with encoding specified in both places
  73. // - Feeds with encoding specified only in XML document and not in HTTP header
  74. // - Feeds with wrong encoding defined and already in UTF-8
  75. func (r *Response) EnsureUnicodeBody() (err error) {
  76. if r.ContentType != "" {
  77. // JSON feeds are always in UTF-8.
  78. if strings.Contains(r.ContentType, "json") {
  79. return
  80. }
  81. if strings.Contains(r.ContentType, "xml") {
  82. buffer, _ := ioutil.ReadAll(r.Body)
  83. r.Body = bytes.NewReader(buffer)
  84. // We ignore documents with encoding specified in XML prolog.
  85. // This is going to be handled by the XML parser.
  86. length := 1024
  87. if len(buffer) < 1024 {
  88. length = len(buffer)
  89. }
  90. if xmlEncodingRegex.Match(buffer[0:length]) {
  91. return
  92. }
  93. // If no encoding is specified in the XML prolog and
  94. // the document is valid UTF-8, nothing needs to be done.
  95. if utf8.Valid(buffer) {
  96. return
  97. }
  98. }
  99. }
  100. r.Body, err = charset.NewReader(r.Body, r.ContentType)
  101. return err
  102. }
  103. // BodyAsString returns the response body as string.
  104. func (r *Response) BodyAsString() string {
  105. bytes, _ := ioutil.ReadAll(r.Body)
  106. return string(bytes)
  107. }