atom_03.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. // Copyright 2019 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package atom // import "miniflux.app/reader/atom"
  5. import (
  6. "encoding/base64"
  7. "html"
  8. "strings"
  9. "time"
  10. "miniflux.app/crypto"
  11. "miniflux.app/logger"
  12. "miniflux.app/model"
  13. "miniflux.app/reader/date"
  14. "miniflux.app/reader/sanitizer"
  15. "miniflux.app/url"
  16. )
  17. // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
  18. type atom03Feed struct {
  19. ID string `xml:"id"`
  20. Title atom03Text `xml:"title"`
  21. Author atomPerson `xml:"author"`
  22. Links atomLinks `xml:"link"`
  23. Entries []atom03Entry `xml:"entry"`
  24. }
  25. func (a *atom03Feed) Transform() *model.Feed {
  26. feed := new(model.Feed)
  27. feed.FeedURL = a.Links.firstLinkWithRelation("self")
  28. feed.SiteURL = a.Links.originalLink()
  29. feed.Title = a.Title.String()
  30. if feed.Title == "" {
  31. feed.Title = feed.SiteURL
  32. }
  33. for _, entry := range a.Entries {
  34. item := entry.Transform()
  35. entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
  36. if err == nil {
  37. item.URL = entryURL
  38. }
  39. if item.Author == "" {
  40. item.Author = a.Author.String()
  41. }
  42. if item.Title == "" {
  43. item.Title = item.URL
  44. }
  45. feed.Entries = append(feed.Entries, item)
  46. }
  47. return feed
  48. }
  49. type atom03Entry struct {
  50. ID string `xml:"id"`
  51. Title atom03Text `xml:"title"`
  52. Modified string `xml:"modified"`
  53. Issued string `xml:"issued"`
  54. Created string `xml:"created"`
  55. Links atomLinks `xml:"link"`
  56. Summary atom03Text `xml:"summary"`
  57. Content atom03Text `xml:"content"`
  58. Author atomPerson `xml:"author"`
  59. }
  60. func (a *atom03Entry) Transform() *model.Entry {
  61. entry := new(model.Entry)
  62. entry.URL = a.Links.originalLink()
  63. entry.Date = a.entryDate()
  64. entry.Author = a.Author.String()
  65. entry.Hash = a.entryHash()
  66. entry.Content = a.entryContent()
  67. entry.Title = a.entryTitle()
  68. return entry
  69. }
  70. func (a *atom03Entry) entryTitle() string {
  71. return sanitizer.StripTags(a.Title.String())
  72. }
  73. func (a *atom03Entry) entryContent() string {
  74. content := a.Content.String()
  75. if content != "" {
  76. return content
  77. }
  78. summary := a.Summary.String()
  79. if summary != "" {
  80. return summary
  81. }
  82. return ""
  83. }
  84. func (a *atom03Entry) entryDate() time.Time {
  85. dateText := ""
  86. for _, value := range []string{a.Issued, a.Modified, a.Created} {
  87. if value != "" {
  88. dateText = value
  89. break
  90. }
  91. }
  92. if dateText != "" {
  93. result, err := date.Parse(dateText)
  94. if err != nil {
  95. logger.Error("atom: %v", err)
  96. return time.Now()
  97. }
  98. return result
  99. }
  100. return time.Now()
  101. }
  102. func (a *atom03Entry) entryHash() string {
  103. for _, value := range []string{a.ID, a.Links.originalLink()} {
  104. if value != "" {
  105. return crypto.Hash(value)
  106. }
  107. }
  108. return ""
  109. }
  110. type atom03Text struct {
  111. Type string `xml:"type,attr"`
  112. Mode string `xml:"mode,attr"`
  113. Data string `xml:",chardata"`
  114. XML string `xml:",innerxml"`
  115. }
  116. func (a *atom03Text) String() string {
  117. content := ""
  118. switch {
  119. case a.Mode == "xml":
  120. content = a.XML
  121. case a.Mode == "escaped":
  122. content = a.Data
  123. case a.Mode == "base64":
  124. b, err := base64.StdEncoding.DecodeString(a.Data)
  125. if err == nil {
  126. content = string(b)
  127. }
  128. default:
  129. content = a.Data
  130. }
  131. if a.Type != "text/html" {
  132. content = html.EscapeString(content)
  133. }
  134. return strings.TrimSpace(content)
  135. }