atom_10_adapter.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "log/slog"
  6. "slices"
  7. "sort"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "miniflux.app/v2/internal/crypto"
  12. "miniflux.app/v2/internal/model"
  13. "miniflux.app/v2/internal/reader/date"
  14. "miniflux.app/v2/internal/reader/sanitizer"
  15. "miniflux.app/v2/internal/urllib"
  16. )
  17. type Atom10Adapter struct {
  18. atomFeed *Atom10Feed
  19. }
  20. func NewAtom10Adapter(atomFeed *Atom10Feed) *Atom10Adapter {
  21. return &Atom10Adapter{atomFeed}
  22. }
  23. func (a *Atom10Adapter) BuildFeed(baseURL string) *model.Feed {
  24. feed := new(model.Feed)
  25. // Populate the feed URL.
  26. feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
  27. if feedURL != "" {
  28. if absoluteFeedURL, err := urllib.AbsoluteURL(baseURL, feedURL); err == nil {
  29. feed.FeedURL = absoluteFeedURL
  30. }
  31. } else {
  32. feed.FeedURL = baseURL
  33. }
  34. // Populate the site URL.
  35. siteURL := a.atomFeed.Links.OriginalLink()
  36. if siteURL != "" {
  37. if absoluteSiteURL, err := urllib.AbsoluteURL(baseURL, siteURL); err == nil {
  38. feed.SiteURL = absoluteSiteURL
  39. }
  40. } else {
  41. feed.SiteURL = baseURL
  42. }
  43. // Populate the feed title.
  44. feed.Title = a.atomFeed.Title.Body()
  45. if feed.Title == "" {
  46. feed.Title = feed.SiteURL
  47. }
  48. // Populate the feed icon.
  49. if a.atomFeed.Icon != "" {
  50. if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Icon); err == nil {
  51. feed.IconURL = absoluteIconURL
  52. }
  53. } else if a.atomFeed.Logo != "" {
  54. if absoluteLogoURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Logo); err == nil {
  55. feed.IconURL = absoluteLogoURL
  56. }
  57. }
  58. feed.Entries = a.populateEntries(feed.SiteURL)
  59. return feed
  60. }
  61. func (a *Atom10Adapter) populateEntries(siteURL string) model.Entries {
  62. entries := make(model.Entries, 0, len(a.atomFeed.Entries))
  63. for _, atomEntry := range a.atomFeed.Entries {
  64. entry := model.NewEntry()
  65. // Populate the entry URL.
  66. entry.URL = atomEntry.Links.OriginalLink()
  67. if entry.URL != "" {
  68. if absoluteEntryURL, err := urllib.AbsoluteURL(siteURL, entry.URL); err == nil {
  69. entry.URL = absoluteEntryURL
  70. }
  71. }
  72. // Populate the entry content.
  73. entry.Content = atomEntry.Content.Body()
  74. if entry.Content == "" {
  75. entry.Content = atomEntry.Summary.Body()
  76. if entry.Content == "" {
  77. entry.Content = atomEntry.FirstMediaDescription()
  78. }
  79. }
  80. // Populate the entry title.
  81. entry.Title = atomEntry.Title.Title()
  82. if entry.Title == "" {
  83. entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
  84. if entry.Title == "" {
  85. entry.Title = entry.URL
  86. }
  87. }
  88. // Populate the entry author.
  89. authors := atomEntry.Authors.PersonNames()
  90. if len(authors) == 0 {
  91. authors = a.atomFeed.Authors.PersonNames()
  92. }
  93. sort.Strings(authors)
  94. authors = slices.Compact(authors)
  95. entry.Author = strings.Join(authors, ", ")
  96. // Populate the entry date.
  97. for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
  98. if value != "" {
  99. if parsedDate, err := date.Parse(value); err != nil {
  100. slog.Debug("Unable to parse date from Atom 1.0 feed",
  101. slog.String("date", value),
  102. slog.String("url", entry.URL),
  103. slog.Any("error", err),
  104. )
  105. } else {
  106. entry.Date = parsedDate
  107. break
  108. }
  109. }
  110. }
  111. if entry.Date.IsZero() {
  112. entry.Date = time.Now()
  113. }
  114. // Populate categories.
  115. categories := atomEntry.Categories.CategoryNames()
  116. if len(categories) == 0 {
  117. categories = a.atomFeed.Categories.CategoryNames()
  118. }
  119. sort.Strings(categories)
  120. entry.Tags = slices.Compact(categories)
  121. // Populate the commentsURL if defined.
  122. // See https://tools.ietf.org/html/rfc4685#section-4
  123. // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
  124. // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
  125. commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
  126. if urllib.IsAbsoluteURL(commentsURL) {
  127. entry.CommentsURL = commentsURL
  128. }
  129. // Generate the entry hash.
  130. for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} {
  131. if value != "" {
  132. entry.Hash = crypto.Hash(value)
  133. break
  134. }
  135. }
  136. // Populate the entry enclosures.
  137. uniqueEnclosuresMap := make(map[string]bool)
  138. for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
  139. mediaURL := strings.TrimSpace(mediaThumbnail.URL)
  140. if mediaURL == "" {
  141. continue
  142. }
  143. if _, found := uniqueEnclosuresMap[mediaURL]; !found {
  144. if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
  145. slog.Debug("Unable to build absolute URL for media thumbnail",
  146. slog.String("url", mediaThumbnail.URL),
  147. slog.String("site_url", siteURL),
  148. slog.Any("error", err),
  149. )
  150. } else {
  151. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  152. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  153. URL: mediaAbsoluteURL,
  154. MimeType: mediaThumbnail.MimeType(),
  155. Size: mediaThumbnail.Size(),
  156. })
  157. }
  158. }
  159. }
  160. for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") {
  161. absoluteEnclosureURL, err := urllib.AbsoluteURL(siteURL, link.Href)
  162. if err != nil {
  163. slog.Debug("Unable to resolve absolute URL for enclosure",
  164. slog.String("enclosure_url", link.Href),
  165. slog.String("entry_url", entry.URL),
  166. slog.Any("error", err),
  167. )
  168. } else {
  169. if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; !found {
  170. uniqueEnclosuresMap[absoluteEnclosureURL] = true
  171. length, _ := strconv.ParseInt(link.Length, 10, 0)
  172. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  173. URL: absoluteEnclosureURL,
  174. MimeType: link.Type,
  175. Size: length,
  176. })
  177. }
  178. }
  179. }
  180. for _, mediaContent := range atomEntry.AllMediaContents() {
  181. mediaURL := strings.TrimSpace(mediaContent.URL)
  182. if mediaURL == "" {
  183. continue
  184. }
  185. if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
  186. slog.Debug("Unable to build absolute URL for media content",
  187. slog.String("url", mediaContent.URL),
  188. slog.String("site_url", siteURL),
  189. slog.Any("error", err),
  190. )
  191. } else {
  192. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
  193. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  194. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  195. URL: mediaAbsoluteURL,
  196. MimeType: mediaContent.MimeType(),
  197. Size: mediaContent.Size(),
  198. })
  199. }
  200. }
  201. }
  202. for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
  203. mediaURL := strings.TrimSpace(mediaPeerLink.URL)
  204. if mediaURL == "" {
  205. continue
  206. }
  207. if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
  208. slog.Debug("Unable to build absolute URL for media peer link",
  209. slog.String("url", mediaPeerLink.URL),
  210. slog.String("site_url", siteURL),
  211. slog.Any("error", err),
  212. )
  213. } else {
  214. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
  215. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  216. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  217. URL: mediaAbsoluteURL,
  218. MimeType: mediaPeerLink.MimeType(),
  219. Size: mediaPeerLink.Size(),
  220. })
  221. }
  222. }
  223. }
  224. entries = append(entries, entry)
  225. }
  226. return entries
  227. }