atom_10_adapter.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "log/slog"
  6. "slices"
  7. "sort"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "miniflux.app/v2/internal/crypto"
  12. "miniflux.app/v2/internal/model"
  13. "miniflux.app/v2/internal/reader/date"
  14. "miniflux.app/v2/internal/reader/sanitizer"
  15. "miniflux.app/v2/internal/urllib"
  16. )
  17. type atom10Adapter struct {
  18. atomFeed *atom10Feed
  19. }
  20. func NewAtom10Adapter(atomFeed *atom10Feed) *atom10Adapter {
  21. return &atom10Adapter{atomFeed}
  22. }
  23. func (a *atom10Adapter) BuildFeed(baseURL string) *model.Feed {
  24. feed := new(model.Feed)
  25. // Populate the feed URL.
  26. feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
  27. if feedURL != "" {
  28. if absoluteFeedURL, err := urllib.ResolveToAbsoluteURL(baseURL, feedURL); err == nil {
  29. feed.FeedURL = absoluteFeedURL
  30. }
  31. } else {
  32. feed.FeedURL = baseURL
  33. }
  34. // Populate the site URL.
  35. siteURL := a.atomFeed.Links.originalLink()
  36. if siteURL != "" {
  37. if absoluteSiteURL, err := urllib.ResolveToAbsoluteURL(baseURL, siteURL); err == nil {
  38. feed.SiteURL = absoluteSiteURL
  39. }
  40. } else {
  41. feed.SiteURL = baseURL
  42. }
  43. // Populate the feed title.
  44. feed.Title = a.atomFeed.Title.body()
  45. if feed.Title == "" {
  46. feed.Title = feed.SiteURL
  47. }
  48. // Populate the feed description.
  49. feed.Description = a.atomFeed.Subtitle.body()
  50. // Populate the feed icon.
  51. if a.atomFeed.Icon != "" {
  52. if absoluteIconURL, err := urllib.ResolveToAbsoluteURL(feed.SiteURL, a.atomFeed.Icon); err == nil {
  53. feed.IconURL = absoluteIconURL
  54. }
  55. } else if a.atomFeed.Logo != "" {
  56. if absoluteLogoURL, err := urllib.ResolveToAbsoluteURL(feed.SiteURL, a.atomFeed.Logo); err == nil {
  57. feed.IconURL = absoluteLogoURL
  58. }
  59. }
  60. feed.Entries = a.populateEntries(feed.SiteURL)
  61. return feed
  62. }
  63. func (a *atom10Adapter) populateEntries(siteURL string) model.Entries {
  64. entries := make(model.Entries, 0, len(a.atomFeed.Entries))
  65. for _, atomEntry := range a.atomFeed.Entries {
  66. entry := model.NewEntry()
  67. // Populate the entry URL.
  68. entry.URL = atomEntry.Links.originalLink()
  69. if entry.URL != "" {
  70. if absoluteEntryURL, err := urllib.ResolveToAbsoluteURL(siteURL, entry.URL); err == nil {
  71. entry.URL = absoluteEntryURL
  72. }
  73. }
  74. // Populate the entry content.
  75. entry.Content = atomEntry.Content.body()
  76. if entry.Content == "" {
  77. entry.Content = atomEntry.Summary.body()
  78. if entry.Content == "" {
  79. entry.Content = atomEntry.FirstMediaDescription()
  80. }
  81. }
  82. // Populate the entry title.
  83. entry.Title = atomEntry.Title.title()
  84. if entry.Title == "" {
  85. entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
  86. if entry.Title == "" {
  87. entry.Title = entry.URL
  88. }
  89. }
  90. // Populate the entry author.
  91. authors := atomEntry.Authors.personNames()
  92. if len(authors) == 0 {
  93. authors = a.atomFeed.Authors.personNames()
  94. }
  95. sort.Strings(authors)
  96. authors = slices.Compact(authors)
  97. entry.Author = strings.Join(authors, ", ")
  98. // Populate the entry date.
  99. for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
  100. if value != "" {
  101. if parsedDate, err := date.Parse(value); err != nil {
  102. slog.Debug("Unable to parse date from Atom 1.0 feed",
  103. slog.String("date", value),
  104. slog.String("url", entry.URL),
  105. slog.Any("error", err),
  106. )
  107. } else {
  108. entry.Date = parsedDate
  109. break
  110. }
  111. }
  112. }
  113. if entry.Date.IsZero() {
  114. entry.Date = time.Now()
  115. }
  116. // Populate categories.
  117. categories := atomEntry.Categories.CategoryNames()
  118. if len(categories) == 0 {
  119. categories = a.atomFeed.Categories.CategoryNames()
  120. }
  121. // Sort and deduplicate categories.
  122. sort.Strings(categories)
  123. entry.Tags = slices.Compact(categories)
  124. // Populate the commentsURL if defined.
  125. // See https://tools.ietf.org/html/rfc4685#section-4
  126. // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
  127. // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
  128. commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
  129. if urllib.IsAbsoluteURL(commentsURL) {
  130. entry.CommentsURL = commentsURL
  131. }
  132. // Generate the entry hash.
  133. for _, value := range []string{atomEntry.ID, atomEntry.Links.originalLink()} {
  134. if value != "" {
  135. entry.Hash = crypto.SHA256(value)
  136. break
  137. }
  138. }
  139. // Populate the entry enclosures.
  140. uniqueEnclosuresMap := make(map[string]bool)
  141. for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
  142. mediaURL := strings.TrimSpace(mediaThumbnail.URL)
  143. if mediaURL == "" {
  144. continue
  145. }
  146. if _, found := uniqueEnclosuresMap[mediaURL]; !found {
  147. if mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL); err != nil {
  148. slog.Debug("Unable to build absolute URL for media thumbnail",
  149. slog.String("url", mediaThumbnail.URL),
  150. slog.String("site_url", siteURL),
  151. slog.Any("error", err),
  152. )
  153. } else {
  154. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  155. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  156. URL: mediaAbsoluteURL,
  157. MimeType: mediaThumbnail.MimeType(),
  158. Size: mediaThumbnail.Size(),
  159. })
  160. }
  161. }
  162. }
  163. for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") {
  164. absoluteEnclosureURL, err := urllib.ResolveToAbsoluteURL(siteURL, link.Href)
  165. if err != nil {
  166. slog.Debug("Unable to resolve absolute URL for enclosure",
  167. slog.String("enclosure_url", link.Href),
  168. slog.String("entry_url", entry.URL),
  169. slog.Any("error", err),
  170. )
  171. } else {
  172. if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; !found {
  173. uniqueEnclosuresMap[absoluteEnclosureURL] = true
  174. length, _ := strconv.ParseInt(link.Length, 10, 0)
  175. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  176. URL: absoluteEnclosureURL,
  177. MimeType: link.Type,
  178. Size: length,
  179. })
  180. }
  181. }
  182. }
  183. for _, mediaContent := range atomEntry.AllMediaContents() {
  184. mediaURL := strings.TrimSpace(mediaContent.URL)
  185. if mediaURL == "" {
  186. continue
  187. }
  188. if mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL); err != nil {
  189. slog.Debug("Unable to build absolute URL for media content",
  190. slog.String("url", mediaContent.URL),
  191. slog.String("site_url", siteURL),
  192. slog.Any("error", err),
  193. )
  194. } else {
  195. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
  196. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  197. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  198. URL: mediaAbsoluteURL,
  199. MimeType: mediaContent.MimeType(),
  200. Size: mediaContent.Size(),
  201. })
  202. }
  203. }
  204. }
  205. for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
  206. mediaURL := strings.TrimSpace(mediaPeerLink.URL)
  207. if mediaURL == "" {
  208. continue
  209. }
  210. if mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL); err != nil {
  211. slog.Debug("Unable to build absolute URL for media peer link",
  212. slog.String("url", mediaPeerLink.URL),
  213. slog.String("site_url", siteURL),
  214. slog.Any("error", err),
  215. )
  216. } else {
  217. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; !found {
  218. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  219. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  220. URL: mediaAbsoluteURL,
  221. MimeType: mediaPeerLink.MimeType(),
  222. Size: mediaPeerLink.Size(),
  223. })
  224. }
  225. }
  226. }
  227. entries = append(entries, entry)
  228. }
  229. return entries
  230. }