response.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package client // import "miniflux.app/http/client"
  5. import (
  6. "bytes"
  7. "io"
  8. "io/ioutil"
  9. "regexp"
  10. "strings"
  11. "unicode/utf8"
  12. "golang.org/x/net/html/charset"
  13. )
  14. var xmlEncodingRegex = regexp.MustCompile(`<\?xml(.*)encoding=["'](.+)["'](.*)\?>`)
  15. // Response wraps a server response.
  16. type Response struct {
  17. Body io.Reader
  18. StatusCode int
  19. EffectiveURL string
  20. LastModified string
  21. ETag string
  22. ContentType string
  23. ContentLength int64
  24. }
  25. // IsNotFound returns true if the resource doesn't exists anymore.
  26. func (r *Response) IsNotFound() bool {
  27. return r.StatusCode == 404 || r.StatusCode == 410
  28. }
  29. // IsNotAuthorized returns true if the resource require authentication.
  30. func (r *Response) IsNotAuthorized() bool {
  31. return r.StatusCode == 401
  32. }
  33. // HasServerFailure returns true if the status code represents a failure.
  34. func (r *Response) HasServerFailure() bool {
  35. return r.StatusCode >= 400
  36. }
  37. // IsModified returns true if the resource has been modified.
  38. func (r *Response) IsModified(etag, lastModified string) bool {
  39. if r.StatusCode == 304 {
  40. return false
  41. }
  42. if r.ETag != "" && r.ETag == etag {
  43. return false
  44. }
  45. if r.LastModified != "" && r.LastModified == lastModified {
  46. return false
  47. }
  48. return true
  49. }
  50. // EnsureUnicodeBody makes sure the body is encoded in UTF-8.
  51. //
  52. // If a charset other than UTF-8 is detected, we convert the document to UTF-8.
  53. // This is used by the scraper and feed readers.
  54. //
  55. // Do not forget edge cases:
  56. //
  57. // - Feeds with encoding specified only in Content-Type header and not in XML document
  58. // - Feeds with encoding specified in both places
  59. // - Feeds with encoding specified only in XML document and not in HTTP header
  60. // - Feeds with wrong encoding defined and already in UTF-8
  61. func (r *Response) EnsureUnicodeBody() (err error) {
  62. if r.ContentType != "" {
  63. // JSON feeds are always in UTF-8.
  64. if strings.Contains(r.ContentType, "json") {
  65. return
  66. }
  67. if strings.Contains(r.ContentType, "xml") {
  68. buffer, _ := ioutil.ReadAll(r.Body)
  69. r.Body = bytes.NewReader(buffer)
  70. // We ignore documents with encoding specified in XML prolog.
  71. // This is going to be handled by the XML parser.
  72. length := 1024
  73. if len(buffer) < 1024 {
  74. length = len(buffer)
  75. }
  76. if xmlEncodingRegex.Match(buffer[0:length]) {
  77. return
  78. }
  79. // If no encoding is specified in the XML prolog and
  80. // the document is valid UTF-8, nothing needs to be done.
  81. if utf8.Valid(buffer) {
  82. return
  83. }
  84. }
  85. }
  86. r.Body, err = charset.NewReader(r.Body, r.ContentType)
  87. return err
  88. }
  89. // String returns the response body as string.
  90. func (r *Response) String() string {
  91. bytes, _ := ioutil.ReadAll(r.Body)
  92. return string(bytes)
  93. }