4
0

atom_10_adapter.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package atom // import "miniflux.app/v2/internal/reader/atom"
  4. import (
  5. "log/slog"
  6. "strconv"
  7. "strings"
  8. "time"
  9. "miniflux.app/v2/internal/crypto"
  10. "miniflux.app/v2/internal/model"
  11. "miniflux.app/v2/internal/reader/date"
  12. "miniflux.app/v2/internal/reader/sanitizer"
  13. "miniflux.app/v2/internal/urllib"
  14. )
  15. type atom10Adapter struct {
  16. atomFeed *atom10Feed
  17. }
  18. func (a *atom10Adapter) buildFeed(baseURL string) *model.Feed {
  19. feed := &model.Feed{
  20. FeedURL: baseURL,
  21. SiteURL: baseURL,
  22. }
  23. // Populate the feed URL.
  24. feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
  25. if feedURL != "" {
  26. if absoluteFeedURL, err := urllib.ResolveToAbsoluteURL(baseURL, feedURL); err == nil {
  27. feed.FeedURL = absoluteFeedURL
  28. }
  29. }
  30. // Populate the site URL.
  31. siteURL := a.atomFeed.Links.originalLink()
  32. if siteURL != "" {
  33. if absoluteSiteURL, err := urllib.ResolveToAbsoluteURL(baseURL, siteURL); err == nil {
  34. feed.SiteURL = absoluteSiteURL
  35. }
  36. }
  37. // Populate the feed title.
  38. feed.Title = a.atomFeed.Title.body()
  39. if feed.Title == "" {
  40. feed.Title = feed.SiteURL
  41. }
  42. // Populate the feed description.
  43. feed.Description = a.atomFeed.Subtitle.body()
  44. // Populate the feed icon.
  45. for _, value := range []string{a.atomFeed.Icon, a.atomFeed.Logo} {
  46. if value = strings.TrimSpace(value); value == "" {
  47. continue
  48. }
  49. if iconURL, err := urllib.ResolveToAbsoluteURL(feed.SiteURL, value); err == nil {
  50. feed.IconURL = iconURL
  51. break
  52. }
  53. }
  54. feed.Entries = a.populateEntries(feed.SiteURL)
  55. return feed
  56. }
  57. func (a *atom10Adapter) populateEntries(siteURL string) model.Entries {
  58. entries := make(model.Entries, 0, len(a.atomFeed.Entries))
  59. for _, atomEntry := range a.atomFeed.Entries {
  60. entry := model.NewEntry()
  61. // Populate the entry URL.
  62. entry.URL = atomEntry.Links.originalLink()
  63. if entry.URL != "" {
  64. if absoluteEntryURL, err := urllib.ResolveToAbsoluteURL(siteURL, entry.URL); err == nil {
  65. entry.URL = absoluteEntryURL
  66. }
  67. }
  68. // If the entry has no links, attempt to use its ID as a URL
  69. // and if that fails, use the site URL.
  70. if entry.URL == "" {
  71. if urllib.IsAbsoluteURL(atomEntry.ID) {
  72. entry.URL = atomEntry.ID
  73. } else {
  74. entry.URL = siteURL
  75. }
  76. }
  77. // Populate the entry content.
  78. entry.Content = atomEntry.Content.body()
  79. if entry.Content == "" {
  80. entry.Content = atomEntry.Summary.body()
  81. if entry.Content == "" {
  82. entry.Content = atomEntry.FirstMediaDescription()
  83. }
  84. }
  85. // Populate the entry title.
  86. entry.Title = atomEntry.Title.title()
  87. if entry.Title == "" {
  88. entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
  89. if entry.Title == "" {
  90. entry.Title = entry.URL
  91. }
  92. }
  93. // Populate the entry author.
  94. authors := atomEntry.Authors.personNames()
  95. if len(authors) == 0 {
  96. authors = a.atomFeed.Authors.personNames()
  97. }
  98. entry.Author = strings.Join(authors, ", ")
  99. // Populate the entry date.
  100. for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
  101. if value = strings.TrimSpace(value); value == "" {
  102. continue
  103. }
  104. parsedDate, err := date.Parse(value)
  105. if err != nil {
  106. slog.Debug("Unable to parse date from Atom 1.0 feed",
  107. slog.String("date", value),
  108. slog.String("url", entry.URL),
  109. slog.Any("error", err),
  110. )
  111. continue
  112. }
  113. entry.Date = parsedDate
  114. break
  115. }
  116. if entry.Date.IsZero() {
  117. entry.Date = time.Now()
  118. }
  119. // Populate categories.
  120. entry.Tags = atomEntry.Categories.CategoryNames()
  121. if len(entry.Tags) == 0 {
  122. entry.Tags = a.atomFeed.Categories.CategoryNames()
  123. }
  124. // Populate the commentsURL if defined.
  125. // See https://tools.ietf.org/html/rfc4685#section-4
  126. // If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
  127. // We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
  128. commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
  129. if urllib.IsAbsoluteURL(commentsURL) {
  130. entry.CommentsURL = commentsURL
  131. }
  132. // Generate the entry hash.
  133. for _, value := range []string{atomEntry.ID, atomEntry.Links.originalLink()} {
  134. if value != "" {
  135. entry.Hash = crypto.SHA256(value)
  136. break
  137. }
  138. }
  139. // Populate the entry enclosures.
  140. uniqueEnclosuresMap := make(map[string]bool)
  141. for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
  142. mediaURL := strings.TrimSpace(mediaThumbnail.URL)
  143. if mediaURL == "" {
  144. continue
  145. }
  146. if _, found := uniqueEnclosuresMap[mediaURL]; found {
  147. continue
  148. }
  149. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  150. if err != nil {
  151. slog.Debug("Unable to build absolute URL for media thumbnail",
  152. slog.String("url", mediaThumbnail.URL),
  153. slog.String("site_url", siteURL),
  154. slog.Any("error", err),
  155. )
  156. continue
  157. }
  158. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  159. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  160. URL: mediaAbsoluteURL,
  161. MimeType: mediaThumbnail.MimeType(),
  162. Size: mediaThumbnail.Size(),
  163. })
  164. }
  165. for _, link := range atomEntry.Links.findAllLinksWithRelation("enclosure") {
  166. absoluteEnclosureURL, err := urllib.ResolveToAbsoluteURL(siteURL, link.Href)
  167. if err != nil {
  168. slog.Debug("Unable to resolve absolute URL for enclosure",
  169. slog.String("enclosure_url", link.Href),
  170. slog.String("entry_url", entry.URL),
  171. slog.Any("error", err),
  172. )
  173. continue
  174. }
  175. if _, found := uniqueEnclosuresMap[absoluteEnclosureURL]; found {
  176. continue
  177. }
  178. uniqueEnclosuresMap[absoluteEnclosureURL] = true
  179. length, _ := strconv.ParseInt(link.Length, 10, 0)
  180. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  181. URL: absoluteEnclosureURL,
  182. MimeType: link.Type,
  183. Size: length,
  184. })
  185. }
  186. for _, mediaContent := range atomEntry.AllMediaContents() {
  187. mediaURL := strings.TrimSpace(mediaContent.URL)
  188. if mediaURL == "" {
  189. continue
  190. }
  191. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  192. if err != nil {
  193. slog.Debug("Unable to build absolute URL for media content",
  194. slog.String("url", mediaContent.URL),
  195. slog.String("site_url", siteURL),
  196. slog.Any("error", err),
  197. )
  198. continue
  199. }
  200. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; found {
  201. continue
  202. }
  203. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  204. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  205. URL: mediaAbsoluteURL,
  206. MimeType: mediaContent.MimeType(),
  207. Size: mediaContent.Size(),
  208. })
  209. }
  210. for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
  211. mediaURL := strings.TrimSpace(mediaPeerLink.URL)
  212. if mediaURL == "" {
  213. continue
  214. }
  215. mediaAbsoluteURL, err := urllib.ResolveToAbsoluteURL(siteURL, mediaURL)
  216. if err != nil {
  217. slog.Debug("Unable to build absolute URL for media peer link",
  218. slog.String("url", mediaPeerLink.URL),
  219. slog.String("site_url", siteURL),
  220. slog.Any("error", err),
  221. )
  222. continue
  223. }
  224. if _, found := uniqueEnclosuresMap[mediaAbsoluteURL]; found {
  225. continue
  226. }
  227. uniqueEnclosuresMap[mediaAbsoluteURL] = true
  228. entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
  229. URL: mediaAbsoluteURL,
  230. MimeType: mediaPeerLink.MimeType(),
  231. Size: mediaPeerLink.Size(),
  232. })
  233. }
  234. entries = append(entries, entry)
  235. }
  236. return entries
  237. }