rss.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rss
  5. import (
  6. "encoding/xml"
  7. "path"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "github.com/miniflux/miniflux/helper"
  12. "github.com/miniflux/miniflux/logger"
  13. "github.com/miniflux/miniflux/model"
  14. "github.com/miniflux/miniflux/reader/date"
  15. "github.com/miniflux/miniflux/url"
  16. )
  17. type rssFeed struct {
  18. XMLName xml.Name `xml:"rss"`
  19. Version string `xml:"version,attr"`
  20. Title string `xml:"channel>title"`
  21. Links []rssLink `xml:"channel>link"`
  22. Language string `xml:"channel>language"`
  23. Description string `xml:"channel>description"`
  24. PubDate string `xml:"channel>pubDate"`
  25. ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
  26. Items []rssItem `xml:"channel>item"`
  27. }
  28. type rssLink struct {
  29. XMLName xml.Name
  30. Data string `xml:",chardata"`
  31. Href string `xml:"href,attr"`
  32. Rel string `xml:"rel,attr"`
  33. }
  34. type rssItem struct {
  35. GUID string `xml:"guid"`
  36. Title string `xml:"title"`
  37. Links []rssLink `xml:"link"`
  38. OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
  39. Description string `xml:"description"`
  40. Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
  41. PubDate string `xml:"pubDate"`
  42. Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
  43. Authors []rssAuthor `xml:"author"`
  44. Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
  45. Enclosures []rssEnclosure `xml:"enclosure"`
  46. OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
  47. }
  48. type rssAuthor struct {
  49. XMLName xml.Name
  50. Data string `xml:",chardata"`
  51. Name string `xml:"name"`
  52. }
  53. type rssEnclosure struct {
  54. URL string `xml:"url,attr"`
  55. Type string `xml:"type,attr"`
  56. Length string `xml:"length,attr"`
  57. }
  58. func (r *rssFeed) GetSiteURL() string {
  59. for _, element := range r.Links {
  60. if element.XMLName.Space == "" {
  61. return strings.TrimSpace(element.Data)
  62. }
  63. }
  64. return ""
  65. }
  66. func (r *rssFeed) GetFeedURL() string {
  67. for _, element := range r.Links {
  68. if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
  69. return strings.TrimSpace(element.Href)
  70. }
  71. }
  72. return ""
  73. }
  74. func (r *rssFeed) Transform() *model.Feed {
  75. feed := new(model.Feed)
  76. feed.SiteURL = r.GetSiteURL()
  77. feed.FeedURL = r.GetFeedURL()
  78. feed.Title = strings.TrimSpace(r.Title)
  79. if feed.Title == "" {
  80. feed.Title = feed.SiteURL
  81. }
  82. for _, item := range r.Items {
  83. entry := item.Transform()
  84. if entry.Author == "" && r.ItunesAuthor != "" {
  85. entry.Author = r.ItunesAuthor
  86. }
  87. entry.Author = strings.TrimSpace(entry.Author)
  88. if entry.URL == "" {
  89. entry.URL = feed.SiteURL
  90. } else {
  91. entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
  92. if err == nil {
  93. entry.URL = entryURL
  94. }
  95. }
  96. if entry.Title == "" {
  97. entry.Title = entry.URL
  98. }
  99. feed.Entries = append(feed.Entries, entry)
  100. }
  101. return feed
  102. }
  103. func (r *rssItem) GetDate() time.Time {
  104. value := r.PubDate
  105. if r.Date != "" {
  106. value = r.Date
  107. }
  108. if value != "" {
  109. result, err := date.Parse(value)
  110. if err != nil {
  111. logger.Error("rss: %v", err)
  112. return time.Now()
  113. }
  114. return result
  115. }
  116. return time.Now()
  117. }
  118. func (r *rssItem) GetAuthor() string {
  119. for _, element := range r.Authors {
  120. if element.Name != "" {
  121. return element.Name
  122. }
  123. if element.Data != "" {
  124. return element.Data
  125. }
  126. }
  127. return r.Creator
  128. }
  129. func (r *rssItem) GetHash() string {
  130. for _, value := range []string{r.GUID, r.GetURL()} {
  131. if value != "" {
  132. return helper.Hash(value)
  133. }
  134. }
  135. return ""
  136. }
  137. func (r *rssItem) GetContent() string {
  138. if r.Content != "" {
  139. return r.Content
  140. }
  141. return r.Description
  142. }
  143. func (r *rssItem) GetURL() string {
  144. if r.OriginalLink != "" {
  145. return r.OriginalLink
  146. }
  147. for _, link := range r.Links {
  148. if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
  149. return strings.TrimSpace(link.Href)
  150. }
  151. if link.Data != "" {
  152. return strings.TrimSpace(link.Data)
  153. }
  154. }
  155. return ""
  156. }
  157. func (r *rssItem) GetEnclosures() model.EnclosureList {
  158. enclosures := make(model.EnclosureList, 0)
  159. for _, enclosure := range r.Enclosures {
  160. length, _ := strconv.Atoi(enclosure.Length)
  161. enclosureURL := enclosure.URL
  162. if r.OrigEnclosureLink != "" {
  163. filename := path.Base(r.OrigEnclosureLink)
  164. if strings.Contains(enclosureURL, filename) {
  165. enclosureURL = r.OrigEnclosureLink
  166. }
  167. }
  168. enclosures = append(enclosures, &model.Enclosure{
  169. URL: enclosureURL,
  170. MimeType: enclosure.Type,
  171. Size: length,
  172. })
  173. }
  174. return enclosures
  175. }
  176. func (r *rssItem) Transform() *model.Entry {
  177. entry := new(model.Entry)
  178. entry.URL = r.GetURL()
  179. entry.Date = r.GetDate()
  180. entry.Author = r.GetAuthor()
  181. entry.Hash = r.GetHash()
  182. entry.Content = r.GetContent()
  183. entry.Title = strings.TrimSpace(r.Title)
  184. entry.Enclosures = r.GetEnclosures()
  185. return entry
  186. }
  187. func isValidLinkRelation(rel string) bool {
  188. switch rel {
  189. case "", "alternate", "enclosure", "related", "self", "via":
  190. return true
  191. default:
  192. if strings.HasPrefix(rel, "http") {
  193. return true
  194. }
  195. return false
  196. }
  197. }