parser.go 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package feed
  5. import (
  6. "bytes"
  7. "encoding/xml"
  8. "errors"
  9. "io"
  10. "strings"
  11. "time"
  12. "github.com/miniflux/miniflux/model"
  13. "github.com/miniflux/miniflux/reader/atom"
  14. "github.com/miniflux/miniflux/reader/encoding"
  15. "github.com/miniflux/miniflux/reader/json"
  16. "github.com/miniflux/miniflux/reader/rdf"
  17. "github.com/miniflux/miniflux/reader/rss"
  18. "github.com/miniflux/miniflux/timer"
  19. )
  20. // List of feed formats.
  21. const (
  22. FormatRDF = "rdf"
  23. FormatRSS = "rss"
  24. FormatAtom = "atom"
  25. FormatJSON = "json"
  26. FormatUnknown = "unknown"
  27. )
  28. // DetectFeedFormat detect feed format from input data.
  29. func DetectFeedFormat(r io.Reader) string {
  30. defer timer.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
  31. var buffer bytes.Buffer
  32. tee := io.TeeReader(r, &buffer)
  33. decoder := xml.NewDecoder(tee)
  34. decoder.CharsetReader = encoding.CharsetReader
  35. for {
  36. token, _ := decoder.Token()
  37. if token == nil {
  38. break
  39. }
  40. if element, ok := token.(xml.StartElement); ok {
  41. switch element.Name.Local {
  42. case "rss":
  43. return FormatRSS
  44. case "feed":
  45. return FormatAtom
  46. case "RDF":
  47. return FormatRDF
  48. }
  49. }
  50. }
  51. if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
  52. return FormatJSON
  53. }
  54. return FormatUnknown
  55. }
  56. func parseFeed(r io.Reader) (*model.Feed, error) {
  57. defer timer.ExecutionTime(time.Now(), "[Feed:ParseFeed]")
  58. var buffer bytes.Buffer
  59. io.Copy(&buffer, r)
  60. reader := bytes.NewReader(buffer.Bytes())
  61. format := DetectFeedFormat(reader)
  62. reader.Seek(0, io.SeekStart)
  63. switch format {
  64. case FormatAtom:
  65. return atom.Parse(reader)
  66. case FormatRSS:
  67. return rss.Parse(reader)
  68. case FormatJSON:
  69. return json.Parse(reader)
  70. case FormatRDF:
  71. return rdf.Parse(reader)
  72. default:
  73. return nil, errors.New("Unsupported feed format")
  74. }
  75. }