encoding.go 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. // Copyright 2018 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package encoding // import "miniflux.app/reader/encoding"
  5. import (
  6. "bytes"
  7. "io"
  8. "io/ioutil"
  9. "unicode/utf8"
  10. "golang.org/x/net/html/charset"
  11. )
  12. // CharsetReader is used when the XML encoding is specified for the input document.
  13. //
  14. // The document is converted in UTF-8 only if a different encoding is specified
  15. // and the document is not already UTF-8.
  16. //
  17. // Several edge cases could exists:
  18. //
  19. // - Feeds with encoding specified only in Content-Type header and not in XML document
  20. // - Feeds with encoding specified in both places
  21. // - Feeds with encoding specified only in XML document and not in HTTP header
  22. // - Feeds with wrong encoding defined and already in UTF-8
  23. func CharsetReader(label string, input io.Reader) (io.Reader, error) {
  24. buffer, _ := ioutil.ReadAll(input)
  25. r := bytes.NewReader(buffer)
  26. // The document is already UTF-8, do not do anything (avoid double-encoding).
  27. // That means the specified encoding in XML prolog is wrong.
  28. if utf8.Valid(buffer) {
  29. return r, nil
  30. }
  31. // Transform document to UTF-8 from the specified encoding in XML prolog.
  32. return charset.NewReaderLabel(label, r)
  33. }