atom.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package atom // import "miniflux.app/reader/atom"
  5. import (
  6. "encoding/xml"
  7. "html"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "miniflux.app/crypto"
  12. "miniflux.app/logger"
  13. "miniflux.app/model"
  14. "miniflux.app/reader/date"
  15. "miniflux.app/reader/sanitizer"
  16. "miniflux.app/url"
  17. )
  18. type atomFeed struct {
  19. XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
  20. ID string `xml:"id"`
  21. Title string `xml:"title"`
  22. Author atomAuthor `xml:"author"`
  23. Links []atomLink `xml:"link"`
  24. Entries []atomEntry `xml:"entry"`
  25. }
  26. type atomEntry struct {
  27. ID string `xml:"id"`
  28. Title atomContent `xml:"title"`
  29. Published string `xml:"published"`
  30. Updated string `xml:"updated"`
  31. Links []atomLink `xml:"link"`
  32. Summary atomContent `xml:"summary"`
  33. Content atomContent `xml:"content"`
  34. MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
  35. Author atomAuthor `xml:"author"`
  36. }
  37. type atomAuthor struct {
  38. Name string `xml:"name"`
  39. Email string `xml:"email"`
  40. }
  41. type atomLink struct {
  42. URL string `xml:"href,attr"`
  43. Type string `xml:"type,attr"`
  44. Rel string `xml:"rel,attr"`
  45. Length string `xml:"length,attr"`
  46. }
  47. type atomContent struct {
  48. Type string `xml:"type,attr"`
  49. Data string `xml:",chardata"`
  50. XML string `xml:",innerxml"`
  51. }
  52. type atomMediaGroup struct {
  53. Description string `xml:"http://search.yahoo.com/mrss/ description"`
  54. }
  55. func (a *atomFeed) Transform() *model.Feed {
  56. feed := new(model.Feed)
  57. feed.FeedURL = getRelationURL(a.Links, "self")
  58. feed.SiteURL = getURL(a.Links)
  59. feed.Title = strings.TrimSpace(a.Title)
  60. if feed.Title == "" {
  61. feed.Title = feed.SiteURL
  62. }
  63. for _, entry := range a.Entries {
  64. item := entry.Transform()
  65. entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
  66. if err == nil {
  67. item.URL = entryURL
  68. }
  69. if item.Author == "" {
  70. item.Author = getAuthor(a.Author)
  71. }
  72. if item.Title == "" {
  73. item.Title = item.URL
  74. }
  75. feed.Entries = append(feed.Entries, item)
  76. }
  77. return feed
  78. }
  79. func (a *atomEntry) Transform() *model.Entry {
  80. entry := new(model.Entry)
  81. entry.URL = getURL(a.Links)
  82. entry.Date = getDate(a)
  83. entry.Author = getAuthor(a.Author)
  84. entry.Hash = getHash(a)
  85. entry.Content = getContent(a)
  86. entry.Title = getTitle(a)
  87. entry.Enclosures = getEnclosures(a)
  88. return entry
  89. }
  90. func getURL(links []atomLink) string {
  91. for _, link := range links {
  92. if strings.ToLower(link.Rel) == "alternate" {
  93. return strings.TrimSpace(link.URL)
  94. }
  95. if link.Rel == "" && link.Type == "" {
  96. return strings.TrimSpace(link.URL)
  97. }
  98. }
  99. return ""
  100. }
  101. func getRelationURL(links []atomLink, relation string) string {
  102. for _, link := range links {
  103. if strings.ToLower(link.Rel) == relation {
  104. return strings.TrimSpace(link.URL)
  105. }
  106. }
  107. return ""
  108. }
  109. func getDate(a *atomEntry) time.Time {
  110. // Note: The published date represents the original creation date for YouTube feeds.
  111. // Example:
  112. // <published>2019-01-26T08:02:28+00:00</published>
  113. // <updated>2019-01-29T07:27:27+00:00</updated>
  114. dateText := a.Published
  115. if dateText == "" {
  116. dateText = a.Updated
  117. }
  118. if dateText != "" {
  119. result, err := date.Parse(dateText)
  120. if err != nil {
  121. logger.Error("atom: %v", err)
  122. return time.Now()
  123. }
  124. return result
  125. }
  126. return time.Now()
  127. }
  128. func atomContentToString(c atomContent) string {
  129. if c.Type == "xhtml" {
  130. return c.XML
  131. }
  132. if c.Type == "html" {
  133. return c.Data
  134. }
  135. if c.Type == "text" || c.Type == "" {
  136. return html.EscapeString(c.Data)
  137. }
  138. return ""
  139. }
  140. func getContent(a *atomEntry) string {
  141. r := atomContentToString(a.Content)
  142. if r != "" {
  143. return r
  144. }
  145. r = atomContentToString(a.Summary)
  146. if r != "" {
  147. return r
  148. }
  149. if a.MediaGroup.Description != "" {
  150. return a.MediaGroup.Description
  151. }
  152. return ""
  153. }
  154. func getTitle(a *atomEntry) string {
  155. title := atomContentToString(a.Title)
  156. return strings.TrimSpace(sanitizer.StripTags(title))
  157. }
  158. func getHash(a *atomEntry) string {
  159. for _, value := range []string{a.ID, getURL(a.Links)} {
  160. if value != "" {
  161. return crypto.Hash(value)
  162. }
  163. }
  164. return ""
  165. }
  166. func getEnclosures(a *atomEntry) model.EnclosureList {
  167. enclosures := make(model.EnclosureList, 0)
  168. for _, link := range a.Links {
  169. if strings.ToLower(link.Rel) == "enclosure" {
  170. length, _ := strconv.ParseInt(link.Length, 10, 0)
  171. enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length})
  172. }
  173. }
  174. return enclosures
  175. }
  176. func getAuthor(author atomAuthor) string {
  177. if author.Name != "" {
  178. return strings.TrimSpace(author.Name)
  179. }
  180. if author.Email != "" {
  181. return strings.TrimSpace(author.Email)
  182. }
  183. return ""
  184. }