parser.go 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package feed
  5. import (
  6. "bytes"
  7. "encoding/xml"
  8. "errors"
  9. "github.com/miniflux/miniflux2/helper"
  10. "github.com/miniflux/miniflux2/model"
  11. "github.com/miniflux/miniflux2/reader/feed/atom"
  12. "github.com/miniflux/miniflux2/reader/feed/json"
  13. "github.com/miniflux/miniflux2/reader/feed/rss"
  14. "io"
  15. "strings"
  16. "time"
  17. "golang.org/x/net/html/charset"
  18. )
  19. const (
  20. FormatRss = "rss"
  21. FormatAtom = "atom"
  22. FormatJson = "json"
  23. FormatUnknown = "unknown"
  24. )
  25. func DetectFeedFormat(data io.Reader) string {
  26. defer helper.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
  27. var buffer bytes.Buffer
  28. tee := io.TeeReader(data, &buffer)
  29. decoder := xml.NewDecoder(tee)
  30. decoder.CharsetReader = charset.NewReaderLabel
  31. for {
  32. token, _ := decoder.Token()
  33. if token == nil {
  34. break
  35. }
  36. if element, ok := token.(xml.StartElement); ok {
  37. switch element.Name.Local {
  38. case "rss":
  39. return FormatRss
  40. case "feed":
  41. return FormatAtom
  42. }
  43. }
  44. }
  45. if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
  46. return FormatJson
  47. }
  48. return FormatUnknown
  49. }
  50. func parseFeed(data io.Reader) (*model.Feed, error) {
  51. defer helper.ExecutionTime(time.Now(), "[Feed:ParseFeed]")
  52. var buffer bytes.Buffer
  53. io.Copy(&buffer, data)
  54. reader := bytes.NewReader(buffer.Bytes())
  55. format := DetectFeedFormat(reader)
  56. reader.Seek(0, io.SeekStart)
  57. switch format {
  58. case FormatAtom:
  59. return atom.Parse(reader)
  60. case FormatRss:
  61. return rss.Parse(reader)
  62. case FormatJson:
  63. return json.Parse(reader)
  64. default:
  65. return nil, errors.New("Unsupported feed format")
  66. }
  67. }