parser.go 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package feed
  5. import (
  6. "bytes"
  7. "encoding/xml"
  8. "errors"
  9. "io"
  10. "strings"
  11. "time"
  12. "github.com/miniflux/miniflux/model"
  13. "github.com/miniflux/miniflux/reader/atom"
  14. "github.com/miniflux/miniflux/reader/json"
  15. "github.com/miniflux/miniflux/reader/rdf"
  16. "github.com/miniflux/miniflux/reader/rss"
  17. "github.com/miniflux/miniflux/timer"
  18. "golang.org/x/net/html/charset"
  19. )
  20. // List of feed formats.
  21. const (
  22. FormatRDF = "rdf"
  23. FormatRSS = "rss"
  24. FormatAtom = "atom"
  25. FormatJSON = "json"
  26. FormatUnknown = "unknown"
  27. )
  28. // DetectFeedFormat detect feed format from input data.
  29. func DetectFeedFormat(data io.Reader) string {
  30. defer timer.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
  31. var buffer bytes.Buffer
  32. tee := io.TeeReader(data, &buffer)
  33. decoder := xml.NewDecoder(tee)
  34. decoder.CharsetReader = charset.NewReaderLabel
  35. for {
  36. token, _ := decoder.Token()
  37. if token == nil {
  38. break
  39. }
  40. if element, ok := token.(xml.StartElement); ok {
  41. switch element.Name.Local {
  42. case "rss":
  43. return FormatRSS
  44. case "feed":
  45. return FormatAtom
  46. case "RDF":
  47. return FormatRDF
  48. }
  49. }
  50. }
  51. if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
  52. return FormatJSON
  53. }
  54. return FormatUnknown
  55. }
  56. func parseFeed(data io.Reader) (*model.Feed, error) {
  57. defer timer.ExecutionTime(time.Now(), "[Feed:ParseFeed]")
  58. var buffer bytes.Buffer
  59. io.Copy(&buffer, data)
  60. reader := bytes.NewReader(buffer.Bytes())
  61. format := DetectFeedFormat(reader)
  62. reader.Seek(0, io.SeekStart)
  63. switch format {
  64. case FormatAtom:
  65. return atom.Parse(reader)
  66. case FormatRSS:
  67. return rss.Parse(reader)
  68. case FormatJSON:
  69. return json.Parse(reader)
  70. case FormatRDF:
  71. return rdf.Parse(reader)
  72. default:
  73. return nil, errors.New("Unsupported feed format")
  74. }
  75. }