atom_03.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "encoding/base64"
  6. "html"
  7. "strings"
  8. "time"
  9. "miniflux.app/v2/internal/crypto"
  10. "miniflux.app/v2/internal/logger"
  11. "miniflux.app/v2/internal/model"
  12. "miniflux.app/v2/internal/reader/date"
  13. "miniflux.app/v2/internal/reader/sanitizer"
  14. "miniflux.app/v2/internal/urllib"
  15. )
  16. // Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
  17. type atom03Feed struct {
  18. ID string `xml:"id"`
  19. Title atom03Text `xml:"title"`
  20. Author atomPerson `xml:"author"`
  21. Links atomLinks `xml:"link"`
  22. Entries []atom03Entry `xml:"entry"`
  23. }
  24. func (a *atom03Feed) Transform(baseURL string) *model.Feed {
  25. var err error
  26. feed := new(model.Feed)
  27. feedURL := a.Links.firstLinkWithRelation("self")
  28. feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
  29. if err != nil {
  30. feed.FeedURL = feedURL
  31. }
  32. siteURL := a.Links.originalLink()
  33. feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
  34. if err != nil {
  35. feed.SiteURL = siteURL
  36. }
  37. feed.Title = a.Title.String()
  38. if feed.Title == "" {
  39. feed.Title = feed.SiteURL
  40. }
  41. for _, entry := range a.Entries {
  42. item := entry.Transform()
  43. entryURL, err := urllib.AbsoluteURL(feed.SiteURL, item.URL)
  44. if err == nil {
  45. item.URL = entryURL
  46. }
  47. if item.Author == "" {
  48. item.Author = a.Author.String()
  49. }
  50. if item.Title == "" {
  51. item.Title = sanitizer.TruncateHTML(item.Content, 100)
  52. }
  53. if item.Title == "" {
  54. item.Title = item.URL
  55. }
  56. feed.Entries = append(feed.Entries, item)
  57. }
  58. return feed
  59. }
  60. type atom03Entry struct {
  61. ID string `xml:"id"`
  62. Title atom03Text `xml:"title"`
  63. Modified string `xml:"modified"`
  64. Issued string `xml:"issued"`
  65. Created string `xml:"created"`
  66. Links atomLinks `xml:"link"`
  67. Summary atom03Text `xml:"summary"`
  68. Content atom03Text `xml:"content"`
  69. Author atomPerson `xml:"author"`
  70. }
  71. func (a *atom03Entry) Transform() *model.Entry {
  72. entry := new(model.Entry)
  73. entry.URL = a.Links.originalLink()
  74. entry.Date = a.entryDate()
  75. entry.Author = a.Author.String()
  76. entry.Hash = a.entryHash()
  77. entry.Content = a.entryContent()
  78. entry.Title = a.entryTitle()
  79. return entry
  80. }
  81. func (a *atom03Entry) entryTitle() string {
  82. return sanitizer.StripTags(a.Title.String())
  83. }
  84. func (a *atom03Entry) entryContent() string {
  85. content := a.Content.String()
  86. if content != "" {
  87. return content
  88. }
  89. summary := a.Summary.String()
  90. if summary != "" {
  91. return summary
  92. }
  93. return ""
  94. }
  95. func (a *atom03Entry) entryDate() time.Time {
  96. dateText := ""
  97. for _, value := range []string{a.Issued, a.Modified, a.Created} {
  98. if value != "" {
  99. dateText = value
  100. break
  101. }
  102. }
  103. if dateText != "" {
  104. result, err := date.Parse(dateText)
  105. if err != nil {
  106. logger.Error("atom: %v", err)
  107. return time.Now()
  108. }
  109. return result
  110. }
  111. return time.Now()
  112. }
  113. func (a *atom03Entry) entryHash() string {
  114. for _, value := range []string{a.ID, a.Links.originalLink()} {
  115. if value != "" {
  116. return crypto.Hash(value)
  117. }
  118. }
  119. return ""
  120. }
  121. type atom03Text struct {
  122. Type string `xml:"type,attr"`
  123. Mode string `xml:"mode,attr"`
  124. CharData string `xml:",chardata"`
  125. InnerXML string `xml:",innerxml"`
  126. }
  127. func (a *atom03Text) String() string {
  128. content := ""
  129. switch {
  130. case a.Mode == "xml":
  131. content = a.InnerXML
  132. case a.Mode == "escaped":
  133. content = a.CharData
  134. case a.Mode == "base64":
  135. b, err := base64.StdEncoding.DecodeString(a.CharData)
  136. if err == nil {
  137. content = string(b)
  138. }
  139. default:
  140. content = a.CharData
  141. }
  142. if a.Type != "text/html" {
  143. content = html.EscapeString(content)
  144. }
  145. return strings.TrimSpace(content)
  146. }