|
|
@@ -21,20 +21,25 @@ import (
|
|
|
"miniflux.app/v2/internal/urllib"
|
|
|
)
|
|
|
|
|
|
-// Specs: https://cyber.harvard.edu/rss/rss.html
|
|
|
+// Specs: https://www.rssboard.org/rss-specification
|
|
|
type rssFeed struct {
|
|
|
- XMLName xml.Name `xml:"rss"`
|
|
|
- Version string `xml:"version,attr"`
|
|
|
- Title string `xml:"channel>title"`
|
|
|
- Links []rssLink `xml:"channel>link"`
|
|
|
- ImageURL string `xml:"channel>image>url"`
|
|
|
- Language string `xml:"channel>language"`
|
|
|
- Description string `xml:"channel>description"`
|
|
|
- PubDate string `xml:"channel>pubDate"`
|
|
|
- ManagingEditor string `xml:"channel>managingEditor"`
|
|
|
- Webmaster string `xml:"channel>webMaster"`
|
|
|
- TimeToLive rssTTL `xml:"channel>ttl"`
|
|
|
- Items []rssItem `xml:"channel>item"`
|
|
|
+ XMLName xml.Name `xml:"rss"`
|
|
|
+ Version string `xml:"rss version,attr"`
|
|
|
+ Channel rssChannel `xml:"rss channel"`
|
|
|
+}
|
|
|
+
|
|
|
+type rssChannel struct {
|
|
|
+ Title string `xml:"rss title"`
|
|
|
+ Link string `xml:"rss link"`
|
|
|
+ ImageURL string `xml:"rss image>url"`
|
|
|
+ Language string `xml:"rss language"`
|
|
|
+ Description string `xml:"rss description"`
|
|
|
+ PubDate string `xml:"rss pubDate"`
|
|
|
+ ManagingEditor string `xml:"rss managingEditor"`
|
|
|
+ Webmaster string `xml:"rss webMaster"`
|
|
|
+ TimeToLive rssTTL `xml:"rss ttl"`
|
|
|
+ Items []rssItem `xml:"rss item"`
|
|
|
+ AtomLinks
|
|
|
PodcastFeedElement
|
|
|
}
|
|
|
|
|
|
@@ -72,15 +77,15 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
|
|
|
feed.FeedURL = feedURL
|
|
|
}
|
|
|
|
|
|
- feed.Title = html.UnescapeString(strings.TrimSpace(r.Title))
|
|
|
+ feed.Title = html.UnescapeString(strings.TrimSpace(r.Channel.Title))
|
|
|
if feed.Title == "" {
|
|
|
feed.Title = feed.SiteURL
|
|
|
}
|
|
|
|
|
|
- feed.IconURL = strings.TrimSpace(r.ImageURL)
|
|
|
- feed.TTL = r.TimeToLive.Value()
|
|
|
+ feed.IconURL = strings.TrimSpace(r.Channel.ImageURL)
|
|
|
+ feed.TTL = r.Channel.TimeToLive.Value()
|
|
|
|
|
|
- for _, item := range r.Items {
|
|
|
+ for _, item := range r.Channel.Items {
|
|
|
entry := item.Transform()
|
|
|
if entry.Author == "" {
|
|
|
entry.Author = r.feedAuthor()
|
|
|
@@ -110,32 +115,29 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
|
|
|
}
|
|
|
|
|
|
func (r *rssFeed) siteURL() string {
|
|
|
- for _, element := range r.Links {
|
|
|
- if element.XMLName.Space == "" {
|
|
|
- return strings.TrimSpace(element.Data)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return ""
|
|
|
+ return strings.TrimSpace(r.Channel.Link)
|
|
|
}
|
|
|
|
|
|
func (r *rssFeed) feedURL() string {
|
|
|
- for _, element := range r.Links {
|
|
|
- if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
|
|
|
- return strings.TrimSpace(element.Href)
|
|
|
+ for _, atomLink := range r.Channel.AtomLinks.Links {
|
|
|
+ if atomLink.Rel == "self" {
|
|
|
+ return strings.TrimSpace(atomLink.URL)
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
return ""
|
|
|
}
|
|
|
|
|
|
func (r rssFeed) feedAuthor() string {
|
|
|
- author := r.PodcastAuthor()
|
|
|
+ author := r.Channel.PodcastAuthor()
|
|
|
switch {
|
|
|
- case r.ManagingEditor != "":
|
|
|
- author = r.ManagingEditor
|
|
|
- case r.Webmaster != "":
|
|
|
- author = r.Webmaster
|
|
|
+ case r.Channel.ManagingEditor != "":
|
|
|
+ author = r.Channel.ManagingEditor
|
|
|
+ case r.Channel.Webmaster != "":
|
|
|
+ author = r.Channel.Webmaster
|
|
|
+ case r.Channel.GooglePlayAuthor != "":
|
|
|
+ author = r.Channel.GooglePlayAuthor
|
|
|
+ case r.Channel.PodcastOwner.String() != "":
|
|
|
+ author = r.Channel.PodcastOwner.String()
|
|
|
}
|
|
|
return sanitizer.StripTags(strings.TrimSpace(author))
|
|
|
}
|
|
|
@@ -146,27 +148,7 @@ type rssGUID struct {
|
|
|
IsPermaLink string `xml:"isPermaLink,attr"`
|
|
|
}
|
|
|
|
|
|
-type rssLink struct {
|
|
|
- XMLName xml.Name
|
|
|
- Data string `xml:",chardata"`
|
|
|
- Href string `xml:"href,attr"`
|
|
|
- Rel string `xml:"rel,attr"`
|
|
|
-}
|
|
|
-
|
|
|
-type rssCommentLink struct {
|
|
|
- XMLName xml.Name
|
|
|
- Data string `xml:",chardata"`
|
|
|
-}
|
|
|
-
|
|
|
type rssAuthor struct {
|
|
|
- XMLName xml.Name
|
|
|
- Data string `xml:",chardata"`
|
|
|
- Name string `xml:"name"`
|
|
|
- Email string `xml:"email"`
|
|
|
- Inner string `xml:",innerxml"`
|
|
|
-}
|
|
|
-
|
|
|
-type rssTitle struct {
|
|
|
XMLName xml.Name
|
|
|
Data string `xml:",chardata"`
|
|
|
Inner string `xml:",innerxml"`
|
|
|
@@ -193,19 +175,21 @@ func (enclosure *rssEnclosure) Size() int64 {
|
|
|
}
|
|
|
|
|
|
type rssItem struct {
|
|
|
- GUID rssGUID `xml:"guid"`
|
|
|
- Title []rssTitle `xml:"title"`
|
|
|
- Links []rssLink `xml:"link"`
|
|
|
- Description string `xml:"description"`
|
|
|
- PubDate string `xml:"pubDate"`
|
|
|
- Authors []rssAuthor `xml:"author"`
|
|
|
- CommentLinks []rssCommentLink `xml:"comments"`
|
|
|
- EnclosureLinks []rssEnclosure `xml:"enclosure"`
|
|
|
- Categories []rssCategory `xml:"category"`
|
|
|
+ GUID rssGUID `xml:"rss guid"`
|
|
|
+ Title string `xml:"rss title"`
|
|
|
+ Link string `xml:"rss link"`
|
|
|
+ Description string `xml:"rss description"`
|
|
|
+ PubDate string `xml:"rss pubDate"`
|
|
|
+ Author rssAuthor `xml:"rss author"`
|
|
|
+ Comments string `xml:"rss comments"`
|
|
|
+ EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
|
|
|
+ Categories []rssCategory `xml:"rss category"`
|
|
|
dublincore.DublinCoreItemElement
|
|
|
FeedBurnerElement
|
|
|
PodcastEntryElement
|
|
|
media.Element
|
|
|
+ AtomAuthor
|
|
|
+ AtomLinks
|
|
|
}
|
|
|
|
|
|
func (r *rssItem) Transform() *model.Entry {
|
|
|
@@ -250,34 +234,26 @@ func (r *rssItem) entryDate() time.Time {
|
|
|
}
|
|
|
|
|
|
func (r *rssItem) entryAuthor() string {
|
|
|
- author := ""
|
|
|
-
|
|
|
- for _, rssAuthor := range r.Authors {
|
|
|
- switch rssAuthor.XMLName.Space {
|
|
|
- case "http://www.itunes.com/dtds/podcast-1.0.dtd", "http://www.google.com/schemas/play-podcasts/1.0":
|
|
|
- author = rssAuthor.Data
|
|
|
- case "http://www.w3.org/2005/Atom":
|
|
|
- if rssAuthor.Name != "" {
|
|
|
- author = rssAuthor.Name
|
|
|
- } else if rssAuthor.Email != "" {
|
|
|
- author = rssAuthor.Email
|
|
|
- }
|
|
|
- default:
|
|
|
- if rssAuthor.Name != "" {
|
|
|
- author = rssAuthor.Name
|
|
|
- } else if strings.Contains(rssAuthor.Inner, "<![CDATA[") {
|
|
|
- author = rssAuthor.Data
|
|
|
- } else {
|
|
|
- author = rssAuthor.Inner
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ var author string
|
|
|
|
|
|
- if author == "" {
|
|
|
- author = r.GetSanitizedCreator()
|
|
|
+ switch {
|
|
|
+ case r.PodcastOwner.String() != "":
|
|
|
+ author = r.PodcastOwner.String()
|
|
|
+ case r.GooglePlayAuthor != "":
|
|
|
+ author = r.GooglePlayAuthor
|
|
|
+ case r.ItunesAuthor != "":
|
|
|
+ author = r.ItunesAuthor
|
|
|
+ case r.DublinCoreCreator != "":
|
|
|
+ author = r.DublinCoreCreator
|
|
|
+ case r.AtomAuthor.String() != "":
|
|
|
+ author = r.AtomAuthor.String()
|
|
|
+ case strings.Contains(r.Author.Inner, "<![CDATA["):
|
|
|
+ author = r.Author.Data
|
|
|
+ default:
|
|
|
+ author = r.Author.Inner
|
|
|
}
|
|
|
|
|
|
- return sanitizer.StripTags(strings.TrimSpace(author))
|
|
|
+ return strings.TrimSpace(sanitizer.StripTags(author))
|
|
|
}
|
|
|
|
|
|
func (r *rssItem) entryHash() string {
|
|
|
@@ -291,21 +267,10 @@ func (r *rssItem) entryHash() string {
|
|
|
}
|
|
|
|
|
|
func (r *rssItem) entryTitle() string {
|
|
|
- var title string
|
|
|
-
|
|
|
- for _, rssTitle := range r.Title {
|
|
|
- switch rssTitle.XMLName.Space {
|
|
|
- case "http://search.yahoo.com/mrss/":
|
|
|
- // Ignore title in media namespace
|
|
|
- case "http://purl.org/dc/elements/1.1/":
|
|
|
- title = rssTitle.Data
|
|
|
- default:
|
|
|
- title = rssTitle.Data
|
|
|
- }
|
|
|
+ title := r.Title
|
|
|
|
|
|
- if title != "" {
|
|
|
- break
|
|
|
- }
|
|
|
+ if r.DublinCoreTitle != "" {
|
|
|
+ title = r.DublinCoreTitle
|
|
|
}
|
|
|
|
|
|
return html.UnescapeString(strings.TrimSpace(title))
|
|
|
@@ -321,17 +286,15 @@ func (r *rssItem) entryContent() string {
|
|
|
}
|
|
|
|
|
|
func (r *rssItem) entryURL() string {
|
|
|
- if r.FeedBurnerLink != "" {
|
|
|
- return r.FeedBurnerLink
|
|
|
- }
|
|
|
-
|
|
|
- for _, link := range r.Links {
|
|
|
- if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
|
|
|
- return strings.TrimSpace(link.Href)
|
|
|
+ for _, link := range []string{r.FeedBurnerLink, r.Link} {
|
|
|
+ if link != "" {
|
|
|
+ return strings.TrimSpace(link)
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- if link.Data != "" {
|
|
|
- return strings.TrimSpace(link.Data)
|
|
|
+ for _, atomLink := range r.AtomLinks.Links {
|
|
|
+ if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
|
|
|
+ return strings.TrimSpace(atomLink.URL)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -425,28 +388,10 @@ func (r *rssItem) entryCategories() []string {
|
|
|
}
|
|
|
|
|
|
func (r *rssItem) entryCommentsURL() string {
|
|
|
- for _, commentLink := range r.CommentLinks {
|
|
|
- if commentLink.XMLName.Space == "" {
|
|
|
- commentsURL := strings.TrimSpace(commentLink.Data)
|
|
|
- // The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL)
|
|
|
- // See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt
|
|
|
- if urllib.IsAbsoluteURL(commentsURL) {
|
|
|
- return commentsURL
|
|
|
- }
|
|
|
- }
|
|
|
+ commentsURL := strings.TrimSpace(r.Comments)
|
|
|
+ if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {
|
|
|
+ return commentsURL
|
|
|
}
|
|
|
|
|
|
return ""
|
|
|
}
|
|
|
-
|
|
|
-func isValidLinkRelation(rel string) bool {
|
|
|
- switch rel {
|
|
|
- case "", "alternate", "enclosure", "related", "self", "via":
|
|
|
- return true
|
|
|
- default:
|
|
|
- if strings.HasPrefix(rel, "http") {
|
|
|
- return true
|
|
|
- }
|
|
|
- return false
|
|
|
- }
|
|
|
-}
|