4
0

atom_03.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. // Copyright 2019 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package atom // import "miniflux.app/reader/atom"
  5. import (
  6. "encoding/base64"
  7. "html"
  8. "strings"
  9. "time"
  10. "miniflux.app/crypto"
  11. "miniflux.app/logger"
  12. "miniflux.app/model"
  13. "miniflux.app/reader/date"
  14. "miniflux.app/reader/sanitizer"
  15. "miniflux.app/url"
  16. )
  17. // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
  18. type atom03Feed struct {
  19. ID string `xml:"id"`
  20. Title atom03Text `xml:"title"`
  21. Author atomPerson `xml:"author"`
  22. Links atomLinks `xml:"link"`
  23. Entries []atom03Entry `xml:"entry"`
  24. }
  25. func (a *atom03Feed) Transform(baseURL string) *model.Feed {
  26. var err error
  27. feed := new(model.Feed)
  28. feedURL := a.Links.firstLinkWithRelation("self")
  29. feed.FeedURL, err = url.AbsoluteURL(baseURL, feedURL)
  30. if err != nil {
  31. feed.FeedURL = feedURL
  32. }
  33. siteURL := a.Links.originalLink()
  34. feed.SiteURL, err = url.AbsoluteURL(baseURL, siteURL)
  35. if err != nil {
  36. feed.SiteURL = siteURL
  37. }
  38. feed.Title = a.Title.String()
  39. if feed.Title == "" {
  40. feed.Title = feed.SiteURL
  41. }
  42. for _, entry := range a.Entries {
  43. item := entry.Transform()
  44. entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
  45. if err == nil {
  46. item.URL = entryURL
  47. }
  48. if item.Author == "" {
  49. item.Author = a.Author.String()
  50. }
  51. if item.Title == "" {
  52. item.Title = sanitizer.TruncateHTML(item.Content, 100)
  53. }
  54. if item.Title == "" {
  55. item.Title = item.URL
  56. }
  57. feed.Entries = append(feed.Entries, item)
  58. }
  59. return feed
  60. }
  61. type atom03Entry struct {
  62. ID string `xml:"id"`
  63. Title atom03Text `xml:"title"`
  64. Modified string `xml:"modified"`
  65. Issued string `xml:"issued"`
  66. Created string `xml:"created"`
  67. Links atomLinks `xml:"link"`
  68. Summary atom03Text `xml:"summary"`
  69. Content atom03Text `xml:"content"`
  70. Author atomPerson `xml:"author"`
  71. }
  72. func (a *atom03Entry) Transform() *model.Entry {
  73. entry := new(model.Entry)
  74. entry.URL = a.Links.originalLink()
  75. entry.Date = a.entryDate()
  76. entry.Author = a.Author.String()
  77. entry.Hash = a.entryHash()
  78. entry.Content = a.entryContent()
  79. entry.Title = a.entryTitle()
  80. return entry
  81. }
  82. func (a *atom03Entry) entryTitle() string {
  83. return sanitizer.StripTags(a.Title.String())
  84. }
  85. func (a *atom03Entry) entryContent() string {
  86. content := a.Content.String()
  87. if content != "" {
  88. return content
  89. }
  90. summary := a.Summary.String()
  91. if summary != "" {
  92. return summary
  93. }
  94. return ""
  95. }
  96. func (a *atom03Entry) entryDate() time.Time {
  97. dateText := ""
  98. for _, value := range []string{a.Issued, a.Modified, a.Created} {
  99. if value != "" {
  100. dateText = value
  101. break
  102. }
  103. }
  104. if dateText != "" {
  105. result, err := date.Parse(dateText)
  106. if err != nil {
  107. logger.Error("atom: %v", err)
  108. return time.Now()
  109. }
  110. return result
  111. }
  112. return time.Now()
  113. }
  114. func (a *atom03Entry) entryHash() string {
  115. for _, value := range []string{a.ID, a.Links.originalLink()} {
  116. if value != "" {
  117. return crypto.Hash(value)
  118. }
  119. }
  120. return ""
  121. }
  122. type atom03Text struct {
  123. Type string `xml:"type,attr"`
  124. Mode string `xml:"mode,attr"`
  125. CharData string `xml:",chardata"`
  126. InnerXML string `xml:",innerxml"`
  127. }
  128. func (a *atom03Text) String() string {
  129. content := ""
  130. switch {
  131. case a.Mode == "xml":
  132. content = a.InnerXML
  133. case a.Mode == "escaped":
  134. content = a.CharData
  135. case a.Mode == "base64":
  136. b, err := base64.StdEncoding.DecodeString(a.CharData)
  137. if err == nil {
  138. content = string(b)
  139. }
  140. default:
  141. content = a.CharData
  142. }
  143. if a.Type != "text/html" {
  144. content = html.EscapeString(content)
  145. }
  146. return strings.TrimSpace(content)
  147. }