4
0

atom_10_adapter.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "log/slog"
  6. "slices"
  7. "sort"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "miniflux.app/v2/internal/crypto"
  12. "miniflux.app/v2/internal/model"
  13. "miniflux.app/v2/internal/reader/date"
  14. "miniflux.app/v2/internal/reader/sanitizer"
  15. "miniflux.app/v2/internal/urllib"
  16. )
  17. type atom10Adapter struct {
  18. atomFeed *atom10Feed
  19. }
  20. func NewAtom10Adapter(atomFeed *atom10Feed) *atom10Adapter {
  21. return &atom10Adapter{atomFeed}
  22. }
  23. func (a *atom10Adapter) BuildFeed(baseURL string) *model.Feed {
  24. feed := &model.Feed{
  25. FeedURL: baseURL,
  26. SiteURL: baseURL,
  27. }
  28. // Populate the feed URL.
  29. feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
  30. if feedURL != "" {
  31. if absoluteFeedURL, err := urllib.ResolveToAbsoluteURL(baseURL, feedURL); err == nil {
  32. feed.FeedURL = absoluteFeedURL
  33. }
  34. }
  35. // Populate the site URL.
  36. siteURL := a.atomFeed.Links.originalLink()
  37. if siteURL != "" {
  38. if absoluteSiteURL, err := urllib.ResolveToAbsoluteURL(baseURL, siteURL); err == nil {
  39. feed.SiteURL = absoluteSiteURL
  40. }
  41. }
  42. // Populate the feed title.
  43. feed.Title = a.atomFeed.Title.body()
  44. if feed.Title == "" {
  45. feed.Title = feed.SiteURL
  46. }
  47. // Populate the feed description.
  48. feed.Description = a.atomFeed.Subtitle.body()
  49. // Populate the feed icon.
  50. for _, value := range []string{a.atomFeed.Icon, a.atomFeed.Logo} {
  51. if value = strings.TrimSpace(value); value == "" {
  52. continue
  53. }
  54. if iconURL, err := urllib.ResolveToAbsoluteURL(feed.SiteURL, value); err == nil {
  55. feed.IconURL = iconURL
  56. break
  57. }
  58. }
  59. feed.Entries = a.populateEntries(feed.SiteURL)
  60. return feed
  61. }
  62. func (a *atom10Adapter) populateEntries(siteURL string) model.Entries {
  63. entries := make(model.Entries, 0, len(a.atomFeed.Entries))
  64. for _, atomEntry := range a.atomFeed.Entries {
  65. entry := model.NewEntry()
  66. // Populate the entry URL.
  67. entry.URL = atomEntry.Links.originalLink()
  68. if entry.URL != "" {
  69. if absoluteEntryURL, err := urllib.ResolveToAbsoluteURL(siteURL, entry.URL); err == nil {
  70. entry.URL = absoluteEntryURL
  71. }
  72. }
  73. // Populate the entry content.
  74. entry.Content = atomEntry.Content.body()
  75. if entry.Content == "" {
  76. entry.Content = atomEntry.Summary.body()
  77. if entry.Content == "" {
  78. entry.Content = atomEntry.FirstMediaDescription()
  79. }
  80. }
  81. // Populate the entry title.
  82. entry.Title = atomEntry.Title.title()
  83. if entry.Title == "" {
  84. entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
  85. if entry.Title == "" {
  86. entry.Title = entry.URL
  87. }
  88. }
  89. // Populate the entry author.
  90. authors := atomEntry.Authors.personNames()
  91. if len(authors) == 0 {
  92. authors = a.atomFeed.Authors.personNames()
  93. }
  94. sort.Strings(authors)
  95. authors = slices.Compact(authors)
  96. entry.Author = strings.Join(authors, ", ")
  97. // Populate the entry date.
  98. for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
  99. if value = strings.TrimSpace(value); value == "" {
  100. continue
  101. }
  102. parsedDate, err := date.Parse(value)
  103. if err != nil {
  104. slog.Debug("Unable to parse date from Atom 1.0 feed",
  105. slog.String("date", value),
  106. slog.String("url", entry.URL),
  107. slog.Any("error", err),
  108. )
  109. continue
  110. }
  111. entry.Date = parsedDate
  112. break
  113. }
  114. if entry.Date.IsZero() {
  115. entry.Date = time.Now()
  116. }
  117. // Populate categories.
  118. categories := atomEntry.Categories.CategoryNames()
  119. if len(categories) == 0 {
  120. categories = a.atomFeed.Categories.CategoryNames()
  121. }
  122. // Sort and deduplicate categories.
  123. sort.Strings(categories)
  124. entry.Tags = slices.Compact(categories)
  125. // Populate the commentsURL if defined.
  126. // See https://tools.ietf.org/html/rfc4685#section-4
  127. // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
  128. // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
  129. commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
  130. if urllib.IsAbsoluteURL(commentsURL) {
  131. entry.CommentsURL = commentsURL
  132. }
  133. // Generate the entry hash.
  134. for _, value := range []string{atomEntry.ID, atomEntry.Links.originalLink()} {
  135. if value != "" {
  136. entry.Hash = crypto.SHA256(value)
  137. break
  138. }
  139. }
  140. // Populate the entry enclosures.
  141. uniqueEnclosuresMap := make(map[string]bool)
  142. for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
  143. mediaURL := strings.TrimSpace(mediaThumbnail.URL)
  144. if mediaURL == "" {
  145. continue
  146. }
  147. if _, found := uniqueEnclosuresMap[mediaURL]; found {
  148. continue
  149. }
  150. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  151. if err != nil {
  152. slog.Debug("Unable to build absolute URL for media thumbnail",
  153. slog.String("url", mediaThumbnail.URL),
  154. slog.String("site_url", siteURL),
  155. slog.Any("error", err),
  156. )
  157. continue
  158. }
  159. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  160. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  161. URL: mediaAbsoluteURL,
  162. MimeType: mediaThumbnail.MimeType(),
  163. Size: mediaThumbnail.Size(),
  164. })
  165. }
  166. for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") {
  167. absoluteEnclosureURL, err := urllib.ResolveToAbsoluteURL(siteURL, link.Href)
  168. if err != nil {
  169. slog.Debug("Unable to resolve absolute URL for enclosure",
  170. slog.String("enclosure_url", link.Href),
  171. slog.String("entry_url", entry.URL),
  172. slog.Any("error", err),
  173. )
  174. continue
  175. }
  176. if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; found {
  177. continue
  178. }
  179. uniqueEnclosuresMap[absoluteEnclosureURL] = true
  180. length, _ := strconv.ParseInt(link.Length, 10, 0)
  181. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  182. URL: absoluteEnclosureURL,
  183. MimeType: link.Type,
  184. Size: length,
  185. })
  186. }
  187. for _, mediaContent := range atomEntry.AllMediaContents() {
  188. mediaURL := strings.TrimSpace(mediaContent.URL)
  189. if mediaURL == "" {
  190. continue
  191. }
  192. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  193. if err != nil {
  194. slog.Debug("Unable to build absolute URL for media content",
  195. slog.String("url", mediaContent.URL),
  196. slog.String("site_url", siteURL),
  197. slog.Any("error", err),
  198. )
  199. continue
  200. }
  201. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; found {
  202. continue
  203. }
  204. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  205. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  206. URL: mediaAbsoluteURL,
  207. MimeType: mediaContent.MimeType(),
  208. Size: mediaContent.Size(),
  209. })
  210. }
  211. for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
  212. mediaURL := strings.TrimSpace(mediaPeerLink.URL)
  213. if mediaURL == "" {
  214. continue
  215. }
  216. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  217. if err != nil {
  218. slog.Debug("Unable to build absolute URL for media peer link",
  219. slog.String("url", mediaPeerLink.URL),
  220. slog.String("site_url", siteURL),
  221. slog.Any("error", err),
  222. )
  223. continue
  224. }
  225. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; found {
  226. continue
  227. }
  228. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  229. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  230. URL: mediaAbsoluteURL,
  231. MimeType: mediaPeerLink.MimeType(),
  232. Size: mediaPeerLink.Size(),
  233. })
  234. }
  235. entries = append(entries, entry)
  236. }
  237. return entries
  238. }