atom.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package atom
  5. import (
  6. "encoding/xml"
  7. "strconv"
  8. "strings"
  9. "time"
  10. "github.com/miniflux/miniflux/crypto"
  11. "github.com/miniflux/miniflux/logger"
  12. "github.com/miniflux/miniflux/model"
  13. "github.com/miniflux/miniflux/reader/date"
  14. "github.com/miniflux/miniflux/reader/sanitizer"
  15. "github.com/miniflux/miniflux/url"
  16. )
  17. type atomFeed struct {
  18. XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
  19. ID string `xml:"id"`
  20. Title string `xml:"title"`
  21. Author atomAuthor `xml:"author"`
  22. Links []atomLink `xml:"link"`
  23. Entries []atomEntry `xml:"entry"`
  24. }
  25. type atomEntry struct {
  26. ID string `xml:"id"`
  27. Title atomContent `xml:"title"`
  28. Published string `xml:"published"`
  29. Updated string `xml:"updated"`
  30. Links []atomLink `xml:"link"`
  31. Summary string `xml:"summary"`
  32. Content atomContent `xml:"content"`
  33. MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
  34. Author atomAuthor `xml:"author"`
  35. }
  36. type atomAuthor struct {
  37. Name string `xml:"name"`
  38. Email string `xml:"email"`
  39. }
  40. type atomLink struct {
  41. URL string `xml:"href,attr"`
  42. Type string `xml:"type,attr"`
  43. Rel string `xml:"rel,attr"`
  44. Length string `xml:"length,attr"`
  45. }
  46. type atomContent struct {
  47. Type string `xml:"type,attr"`
  48. Data string `xml:",chardata"`
  49. XML string `xml:",innerxml"`
  50. }
  51. type atomMediaGroup struct {
  52. Description string `xml:"http://search.yahoo.com/mrss/ description"`
  53. }
  54. func (a *atomFeed) Transform() *model.Feed {
  55. feed := new(model.Feed)
  56. feed.FeedURL = getRelationURL(a.Links, "self")
  57. feed.SiteURL = getURL(a.Links)
  58. feed.Title = strings.TrimSpace(a.Title)
  59. if feed.Title == "" {
  60. feed.Title = feed.SiteURL
  61. }
  62. for _, entry := range a.Entries {
  63. item := entry.Transform()
  64. entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
  65. if err == nil {
  66. item.URL = entryURL
  67. }
  68. if item.Author == "" {
  69. item.Author = getAuthor(a.Author)
  70. }
  71. if item.Title == "" {
  72. item.Title = item.URL
  73. }
  74. feed.Entries = append(feed.Entries, item)
  75. }
  76. return feed
  77. }
  78. func (a *atomEntry) Transform() *model.Entry {
  79. entry := new(model.Entry)
  80. entry.URL = getURL(a.Links)
  81. entry.Date = getDate(a)
  82. entry.Author = getAuthor(a.Author)
  83. entry.Hash = getHash(a)
  84. entry.Content = getContent(a)
  85. entry.Title = getTitle(a)
  86. entry.Enclosures = getEnclosures(a)
  87. return entry
  88. }
  89. func getURL(links []atomLink) string {
  90. for _, link := range links {
  91. if strings.ToLower(link.Rel) == "alternate" {
  92. return strings.TrimSpace(link.URL)
  93. }
  94. if link.Rel == "" && link.Type == "" {
  95. return strings.TrimSpace(link.URL)
  96. }
  97. }
  98. return ""
  99. }
  100. func getRelationURL(links []atomLink, relation string) string {
  101. for _, link := range links {
  102. if strings.ToLower(link.Rel) == relation {
  103. return strings.TrimSpace(link.URL)
  104. }
  105. }
  106. return ""
  107. }
  108. func getDate(a *atomEntry) time.Time {
  109. dateText := a.Updated
  110. if dateText == "" {
  111. dateText = a.Published
  112. }
  113. if dateText != "" {
  114. result, err := date.Parse(dateText)
  115. if err != nil {
  116. logger.Error("atom: %v", err)
  117. return time.Now()
  118. }
  119. return result
  120. }
  121. return time.Now()
  122. }
  123. func getContent(a *atomEntry) string {
  124. if a.Content.Type == "html" || a.Content.Type == "text" {
  125. return a.Content.Data
  126. }
  127. if a.Content.Type == "xhtml" {
  128. return a.Content.XML
  129. }
  130. if a.Summary != "" {
  131. return a.Summary
  132. }
  133. if a.MediaGroup.Description != "" {
  134. return a.MediaGroup.Description
  135. }
  136. return ""
  137. }
  138. func getTitle(a *atomEntry) string {
  139. title := ""
  140. if a.Title.Type == "xhtml" {
  141. title = a.Title.XML
  142. } else {
  143. title = a.Title.Data
  144. }
  145. return strings.TrimSpace(sanitizer.StripTags(title))
  146. }
  147. func getHash(a *atomEntry) string {
  148. for _, value := range []string{a.ID, getURL(a.Links)} {
  149. if value != "" {
  150. return crypto.Hash(value)
  151. }
  152. }
  153. return ""
  154. }
  155. func getEnclosures(a *atomEntry) model.EnclosureList {
  156. enclosures := make(model.EnclosureList, 0)
  157. for _, link := range a.Links {
  158. if strings.ToLower(link.Rel) == "enclosure" {
  159. length, _ := strconv.ParseInt(link.Length, 10, 0)
  160. enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length})
  161. }
  162. }
  163. return enclosures
  164. }
  165. func getAuthor(author atomAuthor) string {
  166. if author.Name != "" {
  167. return strings.TrimSpace(author.Name)
  168. }
  169. if author.Email != "" {
  170. return strings.TrimSpace(author.Email)
  171. }
  172. return ""
  173. }