atom_10_adapter.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "log/slog"
  6. "slices"
  7. "sort"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "miniflux.app/v2/internal/crypto"
  12. "miniflux.app/v2/internal/model"
  13. "miniflux.app/v2/internal/reader/date"
  14. "miniflux.app/v2/internal/reader/sanitizer"
  15. "miniflux.app/v2/internal/urllib"
  16. )
  17. type Atom10Adapter struct {
  18. atomFeed *Atom10Feed
  19. }
  20. func NewAtom10Adapter(atomFeed *Atom10Feed) *Atom10Adapter {
  21. return &Atom10Adapter{atomFeed}
  22. }
  23. func (a *Atom10Adapter) BuildFeed(baseURL string) *model.Feed {
  24. feed := new(model.Feed)
  25. // Populate the feed URL.
  26. feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
  27. if feedURL != "" {
  28. if absoluteFeedURL, err := urllib.AbsoluteURL(baseURL, feedURL); err == nil {
  29. feed.FeedURL = absoluteFeedURL
  30. }
  31. } else {
  32. feed.FeedURL = baseURL
  33. }
  34. // Populate the site URL.
  35. siteURL := a.atomFeed.Links.OriginalLink()
  36. if siteURL != "" {
  37. if absoluteSiteURL, err := urllib.AbsoluteURL(baseURL, siteURL); err == nil {
  38. feed.SiteURL = absoluteSiteURL
  39. }
  40. } else {
  41. feed.SiteURL = baseURL
  42. }
  43. // Populate the feed title.
  44. feed.Title = a.atomFeed.Title.Body()
  45. if feed.Title == "" {
  46. feed.Title = feed.SiteURL
  47. }
  48. // Populate the feed icon.
  49. if a.atomFeed.Icon != "" {
  50. if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Icon); err == nil {
  51. feed.IconURL = absoluteIconURL
  52. }
  53. } else if a.atomFeed.Logo != "" {
  54. if absoluteLogoURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Logo); err == nil {
  55. feed.IconURL = absoluteLogoURL
  56. }
  57. }
  58. for _, atomEntry := range a.atomFeed.Entries {
  59. entry := model.NewEntry()
  60. // Populate the entry URL.
  61. entry.URL = atomEntry.Links.OriginalLink()
  62. if entry.URL != "" {
  63. if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL); err == nil {
  64. entry.URL = absoluteEntryURL
  65. }
  66. }
  67. // Populate the entry content.
  68. entry.Content = atomEntry.Content.Body()
  69. if entry.Content == "" {
  70. entry.Content = atomEntry.Summary.Body()
  71. }
  72. if entry.Content == "" {
  73. entry.Content = atomEntry.FirstMediaDescription()
  74. }
  75. // Populate the entry title.
  76. entry.Title = atomEntry.Title.Title()
  77. if entry.Title == "" {
  78. entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
  79. }
  80. if entry.Title == "" {
  81. entry.Title = entry.URL
  82. }
  83. // Populate the entry author.
  84. authors := atomEntry.Authors.PersonNames()
  85. if len(authors) == 0 {
  86. authors = append(authors, a.atomFeed.Authors.PersonNames()...)
  87. }
  88. authors = slices.Compact(authors)
  89. sort.Strings(authors)
  90. entry.Author = strings.Join(authors, ", ")
  91. // Populate the entry date.
  92. for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
  93. if parsedDate, err := date.Parse(value); err != nil {
  94. slog.Debug("Unable to parse date from Atom 1.0 feed",
  95. slog.String("date", value),
  96. slog.String("url", entry.URL),
  97. slog.Any("error", err),
  98. )
  99. } else {
  100. entry.Date = parsedDate
  101. break
  102. }
  103. }
  104. if entry.Date.IsZero() {
  105. entry.Date = time.Now()
  106. }
  107. // Populate categories.
  108. categories := atomEntry.Categories.CategoryNames()
  109. if len(categories) == 0 {
  110. categories = append(categories, a.atomFeed.Categories.CategoryNames()...)
  111. }
  112. if len(categories) > 0 {
  113. categories = slices.Compact(categories)
  114. sort.Strings(categories)
  115. entry.Tags = categories
  116. }
  117. // Populate the commentsURL if defined.
  118. // See https://tools.ietf.org/html/rfc4685#section-4
  119. // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
  120. // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
  121. commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
  122. if urllib.IsAbsoluteURL(commentsURL) {
  123. entry.CommentsURL = commentsURL
  124. }
  125. // Generate the entry hash.
  126. for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} {
  127. if value != "" {
  128. entry.Hash = crypto.Hash(value)
  129. break
  130. }
  131. }
  132. // Populate the entry enclosures.
  133. uniqueEnclosuresMap := make(map[string]bool)
  134. for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
  135. if _, found := uniqueEnclosuresMap[mediaThumbnail.URL]; !found {
  136. uniqueEnclosuresMap[mediaThumbnail.URL] = true
  137. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  138. URL: mediaThumbnail.URL,
  139. MimeType: mediaThumbnail.MimeType(),
  140. Size: mediaThumbnail.Size(),
  141. })
  142. }
  143. }
  144. for _, link := range atomEntry.Links {
  145. if strings.EqualFold(link.Rel, "enclosure") {
  146. if link.Href == "" {
  147. continue
  148. }
  149. if _, found := uniqueEnclosuresMap[link.Href]; !found {
  150. uniqueEnclosuresMap[link.Href] = true
  151. length, _ := strconv.ParseInt(link.Length, 10, 0)
  152. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  153. URL: link.Href,
  154. MimeType: link.Type,
  155. Size: length,
  156. })
  157. }
  158. }
  159. }
  160. for _, mediaContent := range atomEntry.AllMediaContents() {
  161. if _, found := uniqueEnclosuresMap[mediaContent.URL]; !found {
  162. uniqueEnclosuresMap[mediaContent.URL] = true
  163. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  164. URL: mediaContent.URL,
  165. MimeType: mediaContent.MimeType(),
  166. Size: mediaContent.Size(),
  167. })
  168. }
  169. }
  170. for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
  171. if _, found := uniqueEnclosuresMap[mediaPeerLink.URL]; !found {
  172. uniqueEnclosuresMap[mediaPeerLink.URL] = true
  173. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  174. URL: mediaPeerLink.URL,
  175. MimeType: mediaPeerLink.MimeType(),
  176. Size: mediaPeerLink.Size(),
  177. })
  178. }
  179. }
  180. feed.Entries = append(feed.Entries, entry)
  181. }
  182. return feed
  183. }