atom_03.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "encoding/base64"
  6. "html"
  7. "log/slog"
  8. "strings"
  9. "time"
  10. "miniflux.app/v2/internal/crypto"
  11. "miniflux.app/v2/internal/model"
  12. "miniflux.app/v2/internal/reader/date"
  13. "miniflux.app/v2/internal/reader/sanitizer"
  14. "miniflux.app/v2/internal/urllib"
  15. )
  16. // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
  17. type atom03Feed struct {
  18. ID string `xml:"id"`
  19. Title atom03Text `xml:"title"`
  20. Author atomPerson `xml:"author"`
  21. Links atomLinks `xml:"link"`
  22. Entries []atom03Entry `xml:"entry"`
  23. }
  24. func (a *atom03Feed) Transform(baseURL string) *model.Feed {
  25. var err error
  26. feed := new(model.Feed)
  27. feedURL := a.Links.firstLinkWithRelation("self")
  28. feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
  29. if err != nil {
  30. feed.FeedURL = feedURL
  31. }
  32. siteURL := a.Links.originalLink()
  33. feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
  34. if err != nil {
  35. feed.SiteURL = siteURL
  36. }
  37. feed.Title = a.Title.String()
  38. if feed.Title == "" {
  39. feed.Title = feed.SiteURL
  40. }
  41. for _, entry := range a.Entries {
  42. item := entry.Transform()
  43. entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL)
  44. if err == nil {
  45. item.URL = entryURL
  46. }
  47. if item.Author == "" {
  48. item.Author = a.Author.String()
  49. }
  50. if item.Title == "" {
  51. item.Title = sanitizer.TruncateHTML(item.Content, 100)
  52. }
  53. if item.Title == "" {
  54. item.Title = item.URL
  55. }
  56. feed.Entries = append(feed.Entries, item)
  57. }
  58. return feed
  59. }
  60. type atom03Entry struct {
  61. ID string `xml:"id"`
  62. Title atom03Text `xml:"title"`
  63. Modified string `xml:"modified"`
  64. Issued string `xml:"issued"`
  65. Created string `xml:"created"`
  66. Links atomLinks `xml:"link"`
  67. Summary atom03Text `xml:"summary"`
  68. Content atom03Text `xml:"content"`
  69. Author atomPerson `xml:"author"`
  70. }
  71. func (a *atom03Entry) Transform() *model.Entry {
  72. entry := model.NewEntry()
  73. entry.URL = a.Links.originalLink()
  74. entry.Date = a.entryDate()
  75. entry.Author = a.Author.String()
  76. entry.Hash = a.entryHash()
  77. entry.Content = a.entryContent()
  78. entry.Title = a.entryTitle()
  79. return entry
  80. }
  81. func (a *atom03Entry) entryTitle() string {
  82. return sanitizer.StripTags(a.Title.String())
  83. }
  84. func (a *atom03Entry) entryContent() string {
  85. content := a.Content.String()
  86. if content != "" {
  87. return content
  88. }
  89. summary := a.Summary.String()
  90. if summary != "" {
  91. return summary
  92. }
  93. return ""
  94. }
  95. func (a *atom03Entry) entryDate() time.Time {
  96. dateText := ""
  97. for _, value := range []string{a.Issued, a.Modified, a.Created} {
  98. if value != "" {
  99. dateText = value
  100. break
  101. }
  102. }
  103. if dateText != "" {
  104. result, err := date.Parse(dateText)
  105. if err != nil {
  106. slog.Debug("Unable to parse date from Atom 0.3 feed",
  107. slog.String("date", dateText),
  108. slog.String("id", a.ID),
  109. slog.Any("error", err),
  110. )
  111. return time.Now()
  112. }
  113. return result
  114. }
  115. return time.Now()
  116. }
  117. func (a *atom03Entry) entryHash() string {
  118. for _, value := range []string{a.ID, a.Links.originalLink()} {
  119. if value != "" {
  120. return crypto.Hash(value)
  121. }
  122. }
  123. return ""
  124. }
  125. type atom03Text struct {
  126. Type string `xml:"type,attr"`
  127. Mode string `xml:"mode,attr"`
  128. CharData string `xml:",chardata"`
  129. InnerXML string `xml:",innerxml"`
  130. }
  131. func (a *atom03Text) String() string {
  132. content := ""
  133. switch {
  134. case a.Mode == "xml":
  135. content = a.InnerXML
  136. case a.Mode == "escaped":
  137. content = a.CharData
  138. case a.Mode == "base64":
  139. b, err := base64.StdEncoding.DecodeString(a.CharData)
  140. if err == nil {
  141. content = string(b)
  142. }
  143. default:
  144. content = a.CharData
  145. }
  146. if a.Type != "text/html" {
  147. content = html.EscapeString(content)
  148. }
  149. return strings.TrimSpace(content)
  150. }