rdf.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rdf // import "miniflux.app/reader/rdf"
  5. import (
  6. "encoding/xml"
  7. "html"
  8. "strings"
  9. "time"
  10. "miniflux.app/crypto"
  11. "miniflux.app/logger"
  12. "miniflux.app/model"
  13. "miniflux.app/reader/date"
  14. "miniflux.app/reader/sanitizer"
  15. "miniflux.app/url"
  16. )
  17. type rdfFeed struct {
  18. XMLName xml.Name `xml:"RDF"`
  19. Title string `xml:"channel>title"`
  20. Link string `xml:"channel>link"`
  21. Items []rdfItem `xml:"item"`
  22. DublinCoreFeedElement
  23. }
  24. func (r *rdfFeed) Transform(baseURL string) *model.Feed {
  25. var err error
  26. feed := new(model.Feed)
  27. feed.Title = sanitizer.StripTags(r.Title)
  28. feed.FeedURL = baseURL
  29. feed.SiteURL, err = url.AbsoluteURL(baseURL, r.Link)
  30. if err != nil {
  31. feed.SiteURL = r.Link
  32. }
  33. for _, item := range r.Items {
  34. entry := item.Transform()
  35. if entry.Author == "" && r.DublinCoreCreator != "" {
  36. entry.Author = strings.TrimSpace(r.DublinCoreCreator)
  37. }
  38. if entry.URL == "" {
  39. entry.URL = feed.SiteURL
  40. } else {
  41. entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
  42. if err == nil {
  43. entry.URL = entryURL
  44. }
  45. }
  46. feed.Entries = append(feed.Entries, entry)
  47. }
  48. return feed
  49. }
  50. type rdfItem struct {
  51. Title string `xml:"title"`
  52. Link string `xml:"link"`
  53. Description string `xml:"description"`
  54. DublinCoreEntryElement
  55. }
  56. func (r *rdfItem) Transform() *model.Entry {
  57. entry := new(model.Entry)
  58. entry.Title = r.entryTitle()
  59. entry.Author = r.entryAuthor()
  60. entry.URL = r.entryURL()
  61. entry.Content = r.entryContent()
  62. entry.Hash = r.entryHash()
  63. entry.Date = r.entryDate()
  64. return entry
  65. }
  66. func (r *rdfItem) entryTitle() string {
  67. return html.UnescapeString(strings.TrimSpace(r.Title))
  68. }
  69. func (r *rdfItem) entryContent() string {
  70. switch {
  71. case r.DublinCoreContent != "":
  72. return r.DublinCoreContent
  73. default:
  74. return r.Description
  75. }
  76. }
  77. func (r *rdfItem) entryAuthor() string {
  78. return strings.TrimSpace(r.DublinCoreCreator)
  79. }
  80. func (r *rdfItem) entryURL() string {
  81. return strings.TrimSpace(r.Link)
  82. }
  83. func (r *rdfItem) entryDate() time.Time {
  84. if r.DublinCoreDate != "" {
  85. result, err := date.Parse(r.DublinCoreDate)
  86. if err != nil {
  87. logger.Error("rdf: %v (entry link = %s)", err, r.Link)
  88. return time.Now()
  89. }
  90. return result
  91. }
  92. return time.Now()
  93. }
  94. func (r *rdfItem) entryHash() string {
  95. value := r.Link
  96. if value == "" {
  97. value = r.Title + r.Description
  98. }
  99. return crypto.Hash(value)
  100. }