atom_03.go 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. // Copyright 2019 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package atom // import "miniflux.app/reader/atom"
  5. import (
  6. "encoding/base64"
  7. "html"
  8. "strings"
  9. "time"
  10. "miniflux.app/crypto"
  11. "miniflux.app/logger"
  12. "miniflux.app/model"
  13. "miniflux.app/reader/date"
  14. "miniflux.app/reader/sanitizer"
  15. "miniflux.app/url"
  16. )
  17. // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
  18. type atom03Feed struct {
  19. ID string `xml:"id"`
  20. Title atom03Text `xml:"title"`
  21. Author atomPerson `xml:"author"`
  22. Links atomLinks `xml:"link"`
  23. Entries []atom03Entry `xml:"entry"`
  24. }
  25. func (a *atom03Feed) Transform(baseURL string) *model.Feed {
  26. var err error
  27. feed := new(model.Feed)
  28. feedURL := a.Links.firstLinkWithRelation("self")
  29. feed.FeedURL, err = url.AbsoluteURL(baseURL, feedURL)
  30. if err != nil {
  31. feed.FeedURL = feedURL
  32. }
  33. siteURL := a.Links.originalLink()
  34. feed.SiteURL, err = url.AbsoluteURL(baseURL, siteURL)
  35. if err != nil {
  36. feed.SiteURL = siteURL
  37. }
  38. feed.Title = a.Title.String()
  39. if feed.Title == "" {
  40. feed.Title = feed.SiteURL
  41. }
  42. for _, entry := range a.Entries {
  43. item := entry.Transform()
  44. entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
  45. if err == nil {
  46. item.URL = entryURL
  47. }
  48. if item.Author == "" {
  49. item.Author = a.Author.String()
  50. }
  51. if item.Title == "" {
  52. item.Title = item.URL
  53. }
  54. feed.Entries = append(feed.Entries, item)
  55. }
  56. return feed
  57. }
  58. type atom03Entry struct {
  59. ID string `xml:"id"`
  60. Title atom03Text `xml:"title"`
  61. Modified string `xml:"modified"`
  62. Issued string `xml:"issued"`
  63. Created string `xml:"created"`
  64. Links atomLinks `xml:"link"`
  65. Summary atom03Text `xml:"summary"`
  66. Content atom03Text `xml:"content"`
  67. Author atomPerson `xml:"author"`
  68. }
  69. func (a *atom03Entry) Transform() *model.Entry {
  70. entry := new(model.Entry)
  71. entry.URL = a.Links.originalLink()
  72. entry.Date = a.entryDate()
  73. entry.Author = a.Author.String()
  74. entry.Hash = a.entryHash()
  75. entry.Content = a.entryContent()
  76. entry.Title = a.entryTitle()
  77. return entry
  78. }
  79. func (a *atom03Entry) entryTitle() string {
  80. return sanitizer.StripTags(a.Title.String())
  81. }
  82. func (a *atom03Entry) entryContent() string {
  83. content := a.Content.String()
  84. if content != "" {
  85. return content
  86. }
  87. summary := a.Summary.String()
  88. if summary != "" {
  89. return summary
  90. }
  91. return ""
  92. }
  93. func (a *atom03Entry) entryDate() time.Time {
  94. dateText := ""
  95. for _, value := range []string{a.Issued, a.Modified, a.Created} {
  96. if value != "" {
  97. dateText = value
  98. break
  99. }
  100. }
  101. if dateText != "" {
  102. result, err := date.Parse(dateText)
  103. if err != nil {
  104. logger.Error("atom: %v", err)
  105. return time.Now()
  106. }
  107. return result
  108. }
  109. return time.Now()
  110. }
  111. func (a *atom03Entry) entryHash() string {
  112. for _, value := range []string{a.ID, a.Links.originalLink()} {
  113. if value != "" {
  114. return crypto.Hash(value)
  115. }
  116. }
  117. return ""
  118. }
  119. type atom03Text struct {
  120. Type string `xml:"type,attr"`
  121. Mode string `xml:"mode,attr"`
  122. CharData string `xml:",chardata"`
  123. InnerXML string `xml:",innerxml"`
  124. }
  125. func (a *atom03Text) String() string {
  126. content := ""
  127. switch {
  128. case a.Mode == "xml":
  129. content = a.InnerXML
  130. case a.Mode == "escaped":
  131. content = a.CharData
  132. case a.Mode == "base64":
  133. b, err := base64.StdEncoding.DecodeString(a.CharData)
  134. if err == nil {
  135. content = string(b)
  136. }
  137. default:
  138. content = a.CharData
  139. }
  140. if a.Type != "text/html" {
  141. content = html.EscapeString(content)
  142. }
  143. return strings.TrimSpace(content)
  144. }