|
|
@@ -5,391 +5,110 @@ package rss // import "miniflux.app/v2/internal/reader/rss"
|
|
|
|
|
|
import (
|
|
|
"encoding/xml"
|
|
|
- "html"
|
|
|
- "log/slog"
|
|
|
- "path"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
- "time"
|
|
|
|
|
|
- "miniflux.app/v2/internal/crypto"
|
|
|
- "miniflux.app/v2/internal/model"
|
|
|
- "miniflux.app/v2/internal/reader/date"
|
|
|
"miniflux.app/v2/internal/reader/dublincore"
|
|
|
"miniflux.app/v2/internal/reader/googleplay"
|
|
|
"miniflux.app/v2/internal/reader/itunes"
|
|
|
"miniflux.app/v2/internal/reader/media"
|
|
|
- "miniflux.app/v2/internal/reader/sanitizer"
|
|
|
- "miniflux.app/v2/internal/urllib"
|
|
|
)
|
|
|
|
|
|
// Specs: https://www.rssboard.org/rss-specification
|
|
|
-type rssFeed struct {
|
|
|
- XMLName xml.Name `xml:"rss"`
|
|
|
+type RSS struct {
|
|
|
Version string `xml:"rss version,attr"`
|
|
|
- Channel rssChannel `xml:"rss channel"`
|
|
|
+ Channel RSSChannel `xml:"rss channel"`
|
|
|
}
|
|
|
|
|
|
-type rssChannel struct {
|
|
|
- Categories []string `xml:"rss category"`
|
|
|
+type RSSChannel struct {
|
|
|
Title string `xml:"rss title"`
|
|
|
Link string `xml:"rss link"`
|
|
|
- ImageURL string `xml:"rss image>url"`
|
|
|
- Language string `xml:"rss language"`
|
|
|
Description string `xml:"rss description"`
|
|
|
- PubDate string `xml:"rss pubDate"`
|
|
|
+ Language string `xml:"rss language"`
|
|
|
+ Copyright string `xml:"rss copyRight"`
|
|
|
ManagingEditor string `xml:"rss managingEditor"`
|
|
|
Webmaster string `xml:"rss webMaster"`
|
|
|
- TimeToLive rssTTL `xml:"rss ttl"`
|
|
|
- Items []rssItem `xml:"rss item"`
|
|
|
+ PubDate string `xml:"rss pubDate"`
|
|
|
+ LastBuildDate string `xml:"rss lastBuildDate"`
|
|
|
+ Categories []string `xml:"rss category"`
|
|
|
+ Generator string `xml:"rss generator"`
|
|
|
+ Docs string `xml:"rss docs"`
|
|
|
+ Cloud *RSSCloud `xml:"rss cloud"`
|
|
|
+ Image *RSSImage `xml:"rss image"`
|
|
|
+ TTL string `xml:"rss ttl"`
|
|
|
+ SkipHours []string `xml:"rss skipHours>hour"`
|
|
|
+ SkipDays []string `xml:"rss skipDays>day"`
|
|
|
+ Items []RSSItem `xml:"rss item"`
|
|
|
AtomLinks
|
|
|
- itunes.ItunesFeedElement
|
|
|
- googleplay.GooglePlayFeedElement
|
|
|
-}
|
|
|
-
|
|
|
-type rssTTL struct {
|
|
|
- Data string `xml:",chardata"`
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssTTL) Value() int {
|
|
|
- if r.Data == "" {
|
|
|
- return 0
|
|
|
- }
|
|
|
-
|
|
|
- value, err := strconv.Atoi(r.Data)
|
|
|
- if err != nil {
|
|
|
- return 0
|
|
|
- }
|
|
|
-
|
|
|
- return value
|
|
|
+ itunes.ItunesChannelElement
|
|
|
+ googleplay.GooglePlayChannelElement
|
|
|
}
|
|
|
|
|
|
-func (r *rssFeed) Transform(baseURL string) *model.Feed {
|
|
|
- var err error
|
|
|
-
|
|
|
- feed := new(model.Feed)
|
|
|
-
|
|
|
- siteURL := r.siteURL()
|
|
|
- feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
|
|
|
- if err != nil {
|
|
|
- feed.SiteURL = siteURL
|
|
|
- }
|
|
|
-
|
|
|
- feedURL := r.feedURL()
|
|
|
- feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
|
|
|
- if err != nil {
|
|
|
- feed.FeedURL = feedURL
|
|
|
- }
|
|
|
-
|
|
|
- feed.Title = html.UnescapeString(strings.TrimSpace(r.Channel.Title))
|
|
|
- if feed.Title == "" {
|
|
|
- feed.Title = feed.SiteURL
|
|
|
- }
|
|
|
-
|
|
|
- feed.IconURL = strings.TrimSpace(r.Channel.ImageURL)
|
|
|
- feed.TTL = r.Channel.TimeToLive.Value()
|
|
|
-
|
|
|
- for _, item := range r.Channel.Items {
|
|
|
- entry := item.Transform()
|
|
|
- if entry.Author == "" {
|
|
|
- entry.Author = r.feedAuthor()
|
|
|
- }
|
|
|
-
|
|
|
- if entry.URL == "" {
|
|
|
- entry.URL = feed.SiteURL
|
|
|
- } else {
|
|
|
- entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
|
|
|
- if err == nil {
|
|
|
- entry.URL = entryURL
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if entry.Title == "" {
|
|
|
- entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
|
|
|
- }
|
|
|
-
|
|
|
- if entry.Title == "" {
|
|
|
- entry.Title = entry.URL
|
|
|
- }
|
|
|
-
|
|
|
- entry.Tags = append(entry.Tags, r.Channel.Categories...)
|
|
|
- entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...)
|
|
|
-
|
|
|
- if r.Channel.GooglePlayCategory.Text != "" {
|
|
|
- entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text)
|
|
|
- }
|
|
|
-
|
|
|
- feed.Entries = append(feed.Entries, entry)
|
|
|
- }
|
|
|
-
|
|
|
- return feed
|
|
|
+type RSSCloud struct {
|
|
|
+ Domain string `xml:"domain,attr"`
|
|
|
+ Port string `xml:"port,attr"`
|
|
|
+ Path string `xml:"path,attr"`
|
|
|
+ RegisterProcedure string `xml:"registerProcedure,attr"`
|
|
|
+ Protocol string `xml:"protocol,attr"`
|
|
|
}
|
|
|
|
|
|
-func (r *rssFeed) siteURL() string {
|
|
|
- return strings.TrimSpace(r.Channel.Link)
|
|
|
-}
|
|
|
+type RSSImage struct {
|
|
|
+ // URL is the URL of a GIF, JPEG or PNG image that represents the channel.
|
|
|
+ URL string `xml:"url"`
|
|
|
|
|
|
-func (r *rssFeed) feedURL() string {
|
|
|
- for _, atomLink := range r.Channel.AtomLinks.Links {
|
|
|
- if atomLink.Rel == "self" {
|
|
|
- return strings.TrimSpace(atomLink.URL)
|
|
|
- }
|
|
|
- }
|
|
|
- return ""
|
|
|
-}
|
|
|
+ // Title describes the image, it's used in the ALT attribute of the HTML <img> tag when the channel is rendered in HTML.
|
|
|
+ Title string `xml:"title"`
|
|
|
|
|
|
-func (r rssFeed) feedAuthor() string {
|
|
|
- var author string
|
|
|
- switch {
|
|
|
- case r.Channel.ItunesAuthor != "":
|
|
|
- author = r.Channel.ItunesAuthor
|
|
|
- case r.Channel.GooglePlayAuthor != "":
|
|
|
- author = r.Channel.GooglePlayAuthor
|
|
|
- case r.Channel.ItunesOwner.String() != "":
|
|
|
- author = r.Channel.ItunesOwner.String()
|
|
|
- case r.Channel.ManagingEditor != "":
|
|
|
- author = r.Channel.ManagingEditor
|
|
|
- case r.Channel.Webmaster != "":
|
|
|
- author = r.Channel.Webmaster
|
|
|
- }
|
|
|
- return sanitizer.StripTags(strings.TrimSpace(author))
|
|
|
+ // Link is the URL of the site, when the channel is rendered, the image is a link to the site.
|
|
|
+ Link string `xml:"link"`
|
|
|
}
|
|
|
|
|
|
-type rssGUID struct {
|
|
|
- XMLName xml.Name
|
|
|
- Data string `xml:",chardata"`
|
|
|
- IsPermaLink string `xml:"isPermaLink,attr"`
|
|
|
+type RSSItem struct {
|
|
|
+ Title string `xml:"rss title"`
|
|
|
+ Link string `xml:"rss link"`
|
|
|
+ Description string `xml:"rss description"`
|
|
|
+ Author RSSAuthor `xml:"rss author"`
|
|
|
+ Categories []string `xml:"rss category"`
|
|
|
+ CommentsURL string `xml:"rss comments"`
|
|
|
+ Enclosures []RSSEnclosure `xml:"rss enclosure"`
|
|
|
+ GUID RSSGUID `xml:"rss guid"`
|
|
|
+ PubDate string `xml:"rss pubDate"`
|
|
|
+ Source RSSSource `xml:"rss source"`
|
|
|
+ dublincore.DublinCoreItemElement
|
|
|
+ FeedBurnerItemElement
|
|
|
+ media.MediaItemElement
|
|
|
+ AtomAuthor
|
|
|
+ AtomLinks
|
|
|
+ itunes.ItunesItemElement
|
|
|
+ googleplay.GooglePlayItemElement
|
|
|
}
|
|
|
|
|
|
-type rssAuthor struct {
|
|
|
+type RSSAuthor struct {
|
|
|
XMLName xml.Name
|
|
|
Data string `xml:",chardata"`
|
|
|
Inner string `xml:",innerxml"`
|
|
|
}
|
|
|
|
|
|
-type rssEnclosure struct {
|
|
|
+type RSSEnclosure struct {
|
|
|
URL string `xml:"url,attr"`
|
|
|
Type string `xml:"type,attr"`
|
|
|
Length string `xml:"length,attr"`
|
|
|
}
|
|
|
|
|
|
-func (enclosure *rssEnclosure) Size() int64 {
|
|
|
- if enclosure.Length == "" {
|
|
|
+func (enclosure *RSSEnclosure) Size() int64 {
|
|
|
+ if strings.TrimSpace(enclosure.Length) == "" {
|
|
|
return 0
|
|
|
}
|
|
|
size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
|
|
|
return size
|
|
|
}
|
|
|
|
|
|
-type rssItem struct {
|
|
|
- GUID rssGUID `xml:"rss guid"`
|
|
|
- Title string `xml:"rss title"`
|
|
|
- Link string `xml:"rss link"`
|
|
|
- Description string `xml:"rss description"`
|
|
|
- PubDate string `xml:"rss pubDate"`
|
|
|
- Author rssAuthor `xml:"rss author"`
|
|
|
- Comments string `xml:"rss comments"`
|
|
|
- EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
|
|
|
- Categories []string `xml:"rss category"`
|
|
|
- dublincore.DublinCoreItemElement
|
|
|
- FeedBurnerElement
|
|
|
- media.Element
|
|
|
- AtomAuthor
|
|
|
- AtomLinks
|
|
|
- itunes.ItunesItemElement
|
|
|
- googleplay.GooglePlayItemElement
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) Transform() *model.Entry {
|
|
|
- entry := model.NewEntry()
|
|
|
- entry.URL = r.entryURL()
|
|
|
- entry.CommentsURL = r.entryCommentsURL()
|
|
|
- entry.Date = r.entryDate()
|
|
|
- entry.Author = r.entryAuthor()
|
|
|
- entry.Hash = r.entryHash()
|
|
|
- entry.Content = r.entryContent()
|
|
|
- entry.Title = r.entryTitle()
|
|
|
- entry.Enclosures = r.entryEnclosures()
|
|
|
- entry.Tags = r.Categories
|
|
|
- if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
|
|
|
- entry.ReadingTime = duration
|
|
|
- }
|
|
|
-
|
|
|
- return entry
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) entryDate() time.Time {
|
|
|
- value := r.PubDate
|
|
|
- if r.DublinCoreDate != "" {
|
|
|
- value = r.DublinCoreDate
|
|
|
- }
|
|
|
-
|
|
|
- if value != "" {
|
|
|
- result, err := date.Parse(value)
|
|
|
- if err != nil {
|
|
|
- slog.Debug("Unable to parse date from RSS feed",
|
|
|
- slog.String("date", value),
|
|
|
- slog.String("guid", r.GUID.Data),
|
|
|
- slog.Any("error", err),
|
|
|
- )
|
|
|
- return time.Now()
|
|
|
- }
|
|
|
-
|
|
|
- return result
|
|
|
- }
|
|
|
-
|
|
|
- return time.Now()
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) entryAuthor() string {
|
|
|
- var author string
|
|
|
-
|
|
|
- switch {
|
|
|
- case r.GooglePlayAuthor != "":
|
|
|
- author = r.GooglePlayAuthor
|
|
|
- case r.ItunesAuthor != "":
|
|
|
- author = r.ItunesAuthor
|
|
|
- case r.DublinCoreCreator != "":
|
|
|
- author = r.DublinCoreCreator
|
|
|
- case r.AtomAuthor.String() != "":
|
|
|
- author = r.AtomAuthor.String()
|
|
|
- case strings.Contains(r.Author.Inner, "<![CDATA["):
|
|
|
- author = r.Author.Data
|
|
|
- default:
|
|
|
- author = r.Author.Inner
|
|
|
- }
|
|
|
-
|
|
|
- return strings.TrimSpace(sanitizer.StripTags(author))
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) entryHash() string {
|
|
|
- for _, value := range []string{r.GUID.Data, r.entryURL()} {
|
|
|
- if value != "" {
|
|
|
- return crypto.Hash(value)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return ""
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) entryTitle() string {
|
|
|
- title := r.Title
|
|
|
-
|
|
|
- if r.DublinCoreTitle != "" {
|
|
|
- title = r.DublinCoreTitle
|
|
|
- }
|
|
|
-
|
|
|
- return html.UnescapeString(strings.TrimSpace(title))
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) entryContent() string {
|
|
|
- for _, value := range []string{
|
|
|
- r.DublinCoreContent,
|
|
|
- r.Description,
|
|
|
- r.GooglePlayDescription,
|
|
|
- r.ItunesSummary,
|
|
|
- r.ItunesSubtitle,
|
|
|
- } {
|
|
|
- if value != "" {
|
|
|
- return value
|
|
|
- }
|
|
|
- }
|
|
|
- return ""
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) entryURL() string {
|
|
|
- for _, link := range []string{r.FeedBurnerLink, r.Link} {
|
|
|
- if link != "" {
|
|
|
- return strings.TrimSpace(link)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- for _, atomLink := range r.AtomLinks.Links {
|
|
|
- if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
|
|
|
- return strings.TrimSpace(atomLink.URL)
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
|
|
|
- // isPermaLink is optional, its default value is true.
|
|
|
- // If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
|
|
|
- if r.GUID.IsPermaLink == "true" || r.GUID.IsPermaLink == "" {
|
|
|
- return strings.TrimSpace(r.GUID.Data)
|
|
|
- }
|
|
|
-
|
|
|
- return ""
|
|
|
-}
|
|
|
-
|
|
|
-func (r *rssItem) entryEnclosures() model.EnclosureList {
|
|
|
- enclosures := make(model.EnclosureList, 0)
|
|
|
- duplicates := make(map[string]bool)
|
|
|
-
|
|
|
- for _, mediaThumbnail := range r.AllMediaThumbnails() {
|
|
|
- if _, found := duplicates[mediaThumbnail.URL]; !found {
|
|
|
- duplicates[mediaThumbnail.URL] = true
|
|
|
- enclosures = append(enclosures, &model.Enclosure{
|
|
|
- URL: mediaThumbnail.URL,
|
|
|
- MimeType: mediaThumbnail.MimeType(),
|
|
|
- Size: mediaThumbnail.Size(),
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- for _, enclosure := range r.EnclosureLinks {
|
|
|
- enclosureURL := enclosure.URL
|
|
|
-
|
|
|
- if r.FeedBurnerEnclosureLink != "" {
|
|
|
- filename := path.Base(r.FeedBurnerEnclosureLink)
|
|
|
- if strings.Contains(enclosureURL, filename) {
|
|
|
- enclosureURL = r.FeedBurnerEnclosureLink
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if enclosureURL == "" {
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- if _, found := duplicates[enclosureURL]; !found {
|
|
|
- duplicates[enclosureURL] = true
|
|
|
-
|
|
|
- enclosures = append(enclosures, &model.Enclosure{
|
|
|
- URL: enclosureURL,
|
|
|
- MimeType: enclosure.Type,
|
|
|
- Size: enclosure.Size(),
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- for _, mediaContent := range r.AllMediaContents() {
|
|
|
- if _, found := duplicates[mediaContent.URL]; !found {
|
|
|
- duplicates[mediaContent.URL] = true
|
|
|
- enclosures = append(enclosures, &model.Enclosure{
|
|
|
- URL: mediaContent.URL,
|
|
|
- MimeType: mediaContent.MimeType(),
|
|
|
- Size: mediaContent.Size(),
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- for _, mediaPeerLink := range r.AllMediaPeerLinks() {
|
|
|
- if _, found := duplicates[mediaPeerLink.URL]; !found {
|
|
|
- duplicates[mediaPeerLink.URL] = true
|
|
|
- enclosures = append(enclosures, &model.Enclosure{
|
|
|
- URL: mediaPeerLink.URL,
|
|
|
- MimeType: mediaPeerLink.MimeType(),
|
|
|
- Size: mediaPeerLink.Size(),
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return enclosures
|
|
|
+type RSSGUID struct {
|
|
|
+ Data string `xml:",chardata"`
|
|
|
+ IsPermaLink string `xml:"isPermaLink,attr"`
|
|
|
}
|
|
|
|
|
|
-func (r *rssItem) entryCommentsURL() string {
|
|
|
- commentsURL := strings.TrimSpace(r.Comments)
|
|
|
- if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {
|
|
|
- return commentsURL
|
|
|
- }
|
|
|
-
|
|
|
- return ""
|
|
|
+type RSSSource struct {
|
|
|
+ URL string `xml:"url,attr"`
|
|
|
+ Name string `xml:",chardata"`
|
|
|
}
|