atom_10_adapter.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "log/slog"
  6. "strconv"
  7. "strings"
  8. "time"
  9. "miniflux.app/v2/internal/crypto"
  10. "miniflux.app/v2/internal/model"
  11. "miniflux.app/v2/internal/reader/date"
  12. "miniflux.app/v2/internal/reader/sanitizer"
  13. "miniflux.app/v2/internal/urllib"
  14. )
  15. type atom10Adapter struct {
  16. atomFeed *atom10Feed
  17. }
  18. func NewAtom10Adapter(atomFeed *atom10Feed) *atom10Adapter {
  19. return &atom10Adapter{atomFeed}
  20. }
  21. func (a *atom10Adapter) BuildFeed(baseURL string) *model.Feed {
  22. feed := &model.Feed{
  23. FeedURL: baseURL,
  24. SiteURL: baseURL,
  25. }
  26. // Populate the feed URL.
  27. feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
  28. if feedURL != "" {
  29. if absoluteFeedURL, err := urllib.ResolveToAbsoluteURL(baseURL, feedURL); err == nil {
  30. feed.FeedURL = absoluteFeedURL
  31. }
  32. }
  33. // Populate the site URL.
  34. siteURL := a.atomFeed.Links.originalLink()
  35. if siteURL != "" {
  36. if absoluteSiteURL, err := urllib.ResolveToAbsoluteURL(baseURL, siteURL); err == nil {
  37. feed.SiteURL = absoluteSiteURL
  38. }
  39. }
  40. // Populate the feed title.
  41. feed.Title = a.atomFeed.Title.body()
  42. if feed.Title == "" {
  43. feed.Title = feed.SiteURL
  44. }
  45. // Populate the feed description.
  46. feed.Description = a.atomFeed.Subtitle.body()
  47. // Populate the feed icon.
  48. for _, value := range []string{a.atomFeed.Icon, a.atomFeed.Logo} {
  49. if value = strings.TrimSpace(value); value == "" {
  50. continue
  51. }
  52. if iconURL, err := urllib.ResolveToAbsoluteURL(feed.SiteURL, value); err == nil {
  53. feed.IconURL = iconURL
  54. break
  55. }
  56. }
  57. feed.Entries = a.populateEntries(feed.SiteURL)
  58. return feed
  59. }
  60. func (a *atom10Adapter) populateEntries(siteURL string) model.Entries {
  61. entries := make(model.Entries, 0, len(a.atomFeed.Entries))
  62. for _, atomEntry := range a.atomFeed.Entries {
  63. entry := model.NewEntry()
  64. // Populate the entry URL.
  65. entry.URL = atomEntry.Links.originalLink()
  66. if entry.URL != "" {
  67. if absoluteEntryURL, err := urllib.ResolveToAbsoluteURL(siteURL, entry.URL); err == nil {
  68. entry.URL = absoluteEntryURL
  69. }
  70. }
  71. // Populate the entry content.
  72. entry.Content = atomEntry.Content.body()
  73. if entry.Content == "" {
  74. entry.Content = atomEntry.Summary.body()
  75. if entry.Content == "" {
  76. entry.Content = atomEntry.FirstMediaDescription()
  77. }
  78. }
  79. // Populate the entry title.
  80. entry.Title = atomEntry.Title.title()
  81. if entry.Title == "" {
  82. entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
  83. if entry.Title == "" {
  84. entry.Title = entry.URL
  85. }
  86. }
  87. // Populate the entry author.
  88. authors := atomEntry.Authors.personNames()
  89. if len(authors) == 0 {
  90. authors = a.atomFeed.Authors.personNames()
  91. }
  92. entry.Author = strings.Join(authors, ", ")
  93. // Populate the entry date.
  94. for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
  95. if value = strings.TrimSpace(value); value == "" {
  96. continue
  97. }
  98. parsedDate, err := date.Parse(value)
  99. if err != nil {
  100. slog.Debug("Unable to parse date from Atom 1.0 feed",
  101. slog.String("date", value),
  102. slog.String("url", entry.URL),
  103. slog.Any("error", err),
  104. )
  105. continue
  106. }
  107. entry.Date = parsedDate
  108. break
  109. }
  110. if entry.Date.IsZero() {
  111. entry.Date = time.Now()
  112. }
  113. // Populate categories.
  114. entry.Tags = atomEntry.Categories.CategoryNames()
  115. if len(entry.Tags) == 0 {
  116. entry.Tags = a.atomFeed.Categories.CategoryNames()
  117. }
  118. // Populate the commentsURL if defined.
  119. // See https://tools.ietf.org/html/rfc4685#section-4
  120. // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
  121. // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
  122. commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
  123. if urllib.IsAbsoluteURL(commentsURL) {
  124. entry.CommentsURL = commentsURL
  125. }
  126. // Generate the entry hash.
  127. for _, value := range []string{atomEntry.ID, atomEntry.Links.originalLink()} {
  128. if value != "" {
  129. entry.Hash = crypto.SHA256(value)
  130. break
  131. }
  132. }
  133. // Populate the entry enclosures.
  134. uniqueEnclosuresMap := make(map[string]bool)
  135. for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
  136. mediaURL := strings.TrimSpace(mediaThumbnail.URL)
  137. if mediaURL == "" {
  138. continue
  139. }
  140. if _, found := uniqueEnclosuresMap[mediaURL]; found {
  141. continue
  142. }
  143. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  144. if err != nil {
  145. slog.Debug("Unable to build absolute URL for media thumbnail",
  146. slog.String("url", mediaThumbnail.URL),
  147. slog.String("site_url", siteURL),
  148. slog.Any("error", err),
  149. )
  150. continue
  151. }
  152. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  153. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  154. URL: mediaAbsoluteURL,
  155. MimeType: mediaThumbnail.MimeType(),
  156. Size: mediaThumbnail.Size(),
  157. })
  158. }
  159. for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") {
  160. absoluteEnclosureURL, err := urllib.ResolveToAbsoluteURL(siteURL, link.Href)
  161. if err != nil {
  162. slog.Debug("Unable to resolve absolute URL for enclosure",
  163. slog.String("enclosure_url", link.Href),
  164. slog.String("entry_url", entry.URL),
  165. slog.Any("error", err),
  166. )
  167. continue
  168. }
  169. if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; found {
  170. continue
  171. }
  172. uniqueEnclosuresMap[absoluteEnclosureURL] = true
  173. length, _ := strconv.ParseInt(link.Length, 10, 0)
  174. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  175. URL: absoluteEnclosureURL,
  176. MimeType: link.Type,
  177. Size: length,
  178. })
  179. }
  180. for _, mediaContent := range atomEntry.AllMediaContents() {
  181. mediaURL := strings.TrimSpace(mediaContent.URL)
  182. if mediaURL == "" {
  183. continue
  184. }
  185. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  186. if err != nil {
  187. slog.Debug("Unable to build absolute URL for media content",
  188. slog.String("url", mediaContent.URL),
  189. slog.String("site_url", siteURL),
  190. slog.Any("error", err),
  191. )
  192. continue
  193. }
  194. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; found {
  195. continue
  196. }
  197. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  198. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  199. URL: mediaAbsoluteURL,
  200. MimeType: mediaContent.MimeType(),
  201. Size: mediaContent.Size(),
  202. })
  203. }
  204. for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
  205. mediaURL := strings.TrimSpace(mediaPeerLink.URL)
  206. if mediaURL == "" {
  207. continue
  208. }
  209. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  210. if err != nil {
  211. slog.Debug("Unable to build absolute URL for media peer link",
  212. slog.String("url", mediaPeerLink.URL),
  213. slog.String("site_url", siteURL),
  214. slog.Any("error", err),
  215. )
  216. continue
  217. }
  218. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; found {
  219. continue
  220. }
  221. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  222. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  223. URL: mediaAbsoluteURL,
  224. MimeType: mediaPeerLink.MimeType(),
  225. Size: mediaPeerLink.Size(),
  226. })
  227. }
  228. entries = append(entries, entry)
  229. }
  230. return entries
  231. }