hace 2 años · 4834e934f2
--- a/internal/reader/rss/adapter.go
+++ b/internal/reader/rss/adapter.go
@@ -39,7 +39,7 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
 
				 
			
 
				 	// Try to find the feed URL from the Atom links.
			
 
				 	for _, atomLink := range r.rss.Channel.AtomLinks.Links {
			
 
				-		atomLinkHref := strings.TrimSpace(atomLink.URL)
			
 
				+		atomLinkHref := strings.TrimSpace(atomLink.Href)
			
 
				 		if atomLinkHref != "" && atomLink.Rel == "self" {
			
 
				 			if absoluteFeedURL, err := urllib.AbsoluteURL(feedURL, atomLinkHref); err == nil {
			
 
				 				feed.FeedURL = absoluteFeedURL
			
@@ -170,8 +170,8 @@ func findEntryURL(rssItem *RSSItem) string {
 
				 	}
			
 
				 
			
 
				 	for _, atomLink := range rssItem.AtomLinks.Links {
			
 
				-		if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
			
 
				-			return strings.TrimSpace(atomLink.URL)
			
 
				+		if atomLink.Href != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
			
 
				+			return strings.TrimSpace(atomLink.Href)
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -233,8 +233,8 @@ func findEntryAuthor(rssItem *RSSItem) string {
 
				 		author = rssItem.ItunesAuthor
			
 
				 	case rssItem.DublinCoreCreator != "":
			
 
				 		author = rssItem.DublinCoreCreator
			
 
				-	case rssItem.AtomAuthor.String() != "":
			
 
				-		author = rssItem.AtomAuthor.String()
			
 
				+	case rssItem.AtomAuthor.PersonName() != "":
			
 
				+		author = rssItem.AtomAuthor.PersonName()
			
 
				 	case strings.Contains(rssItem.Author.Inner, "<![CDATA["):
			
 
				 		author = rssItem.Author.Data
			
 
				 	default:
			
--- a/internal/reader/rss/atom.go
+++ b/internal/reader/rss/atom.go
@@ -3,41 +3,18 @@
 
				 
			
 
				 package rss // import "miniflux.app/v2/internal/reader/rss"
			
 
				 
			
 
				-import "strings"
			
 
				+import (
			
 
				+	"miniflux.app/v2/internal/reader/atom"
			
 
				+)
			
 
				 
			
 
				 type AtomAuthor struct {
			
 
				-	Author AtomPerson `xml:"http://www.w3.org/2005/Atom author"`
			
 
				+	Author atom.AtomPerson `xml:"http://www.w3.org/2005/Atom author"`
			
 
				 }
			
 
				 
			
 
				-func (a *AtomAuthor) String() string {
			
 
				-	return a.Author.String()
			
 
				-}
			
 
				-
			
 
				-type AtomPerson struct {
			
 
				-	Name  string `xml:"name"`
			
 
				-	Email string `xml:"email"`
			
 
				-}
			
 
				-
			
 
				-func (a *AtomPerson) String() string {
			
 
				-	var name string
			
 
				-
			
 
				-	switch {
			
 
				-	case a.Name != "":
			
 
				-		name = a.Name
			
 
				-	case a.Email != "":
			
 
				-		name = a.Email
			
 
				-	}
			
 
				-
			
 
				-	return strings.TrimSpace(name)
			
 
				-}
			
 
				-
			
 
				-type AtomLink struct {
			
 
				-	URL    string `xml:"href,attr"`
			
 
				-	Type   string `xml:"type,attr"`
			
 
				-	Rel    string `xml:"rel,attr"`
			
 
				-	Length string `xml:"length,attr"`
			
 
				+func (a *AtomAuthor) PersonName() string {
			
 
				+	return a.Author.PersonName()
			
 
				 }
			
 
				 
			
 
				 type AtomLinks struct {
			
 
				-	Links []*AtomLink `xml:"http://www.w3.org/2005/Atom link"`
			
 
				+	Links []*atom.AtomLink `xml:"http://www.w3.org/2005/Atom link"`
			
 
				 }
			
--- a/internal/reader/rss/parser_test.go
+++ b/internal/reader/rss/parser_test.go
@@ -746,6 +746,106 @@ func TestParseEntryWithContentEncoded(t *testing.T) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+// https://www.rssboard.org/rss-encoding-examples
			
 
				+func TestParseEntryDescriptionWithEncodedHTMLTags(t *testing.T) {
			
 
				+	data := `<?xml version="1.0" encoding="utf-8"?>
			
 
				+		<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
			
 
				+		<channel>
			
 
				+			<title>Example</title>
			
 
				+			<link>http://example.org/</link>
			
 
				+			<item>
			
 
				+				<title>Item 1</title>
			
 
				+				<link>http://example.org/item1</link>
			
 
				+				<description>this is &lt;b&gt;bold&lt;/b&gt;</description>
			
 
				+			</item>
			
 
				+		</channel>
			
 
				+	</rss>`
			
 
				+
			
 
				+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+
			
 
				+	if feed.Entries[0].Content != `this is <b>bold</b>` {
			
 
				+		t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// https://www.rssboard.org/rss-encoding-examples
			
 
				+func TestParseEntryWithDescriptionWithHTMLCDATA(t *testing.T) {
			
 
				+	data := `<?xml version="1.0" encoding="utf-8"?>
			
 
				+		<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
			
 
				+		<channel>
			
 
				+			<title>Example</title>
			
 
				+			<link>http://example.org/</link>
			
 
				+			<item>
			
 
				+				<title>Item 1</title>
			
 
				+				<link>http://example.org/item1</link>
			
 
				+				<description><![CDATA[this is <b>bold</b>]]></description>
			
 
				+			</item>
			
 
				+		</channel>
			
 
				+	</rss>`
			
 
				+
			
 
				+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+
			
 
				+	if feed.Entries[0].Content != `this is <b>bold</b>` {
			
 
				+		t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// https://www.rssboard.org/rss-encoding-examples
			
 
				+func TestParseEntryDescriptionWithEncodingAngleBracketsInText(t *testing.T) {
			
 
				+	data := `<?xml version="1.0" encoding="utf-8"?>
			
 
				+		<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
			
 
				+		<channel>
			
 
				+			<title>Example</title>
			
 
				+			<link>http://example.org/</link>
			
 
				+			<item>
			
 
				+				<title>Item 1</title>
			
 
				+				<link>http://example.org/item1</link>
			
 
				+				<description>5 &amp;lt; 8, ticker symbol &amp;lt;BIGCO&amp;gt;</description>
			
 
				+			</item>
			
 
				+		</channel>
			
 
				+	</rss>`
			
 
				+
			
 
				+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+
			
 
				+	if feed.Entries[0].Content != `5 &lt; 8, ticker symbol &lt;BIGCO&gt;` {
			
 
				+		t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// https://www.rssboard.org/rss-encoding-examples
			
 
				+func TestParseEntryDescriptionWithEncodingAngleBracketsWithinCDATASection(t *testing.T) {
			
 
				+	data := `<?xml version="1.0" encoding="utf-8"?>
			
 
				+		<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
			
 
				+		<channel>
			
 
				+			<title>Example</title>
			
 
				+			<link>http://example.org/</link>
			
 
				+			<item>
			
 
				+				<title>Item 1</title>
			
 
				+				<link>http://example.org/item1</link>
			
 
				+				<description><![CDATA[5 &lt; 8, ticker symbol &lt;BIGCO&gt;]]></description>
			
 
				+			</item>
			
 
				+		</channel>
			
 
				+	</rss>`
			
 
				+
			
 
				+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
			
 
				+	if err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+
			
 
				+	if feed.Entries[0].Content != `5 &lt; 8, ticker symbol &lt;BIGCO&gt;` {
			
 
				+		t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 func TestParseEntryWithFeedBurnerLink(t *testing.T) {
			
 
				 	data := `<?xml version="1.0" encoding="utf-8"?>
			
 
				 		<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
			
--- a/internal/reader/rss/rss.go
+++ b/internal/reader/rss/rss.go
@@ -16,29 +16,75 @@ import (
 
				 
			
 
				 // Specs: https://www.rssboard.org/rss-specification
			
 
				 type RSS struct {
			
 
				-	Version string     `xml:"rss version,attr"`
			
 
				+	// Version is the version of the RSS specification.
			
 
				+	Version string `xml:"rss version,attr"`
			
 
				+
			
 
				+	// Channel is the main container for the RSS feed.
			
 
				 	Channel RSSChannel `xml:"rss channel"`
			
 
				 }
			
 
				 
			
 
				 type RSSChannel struct {
			
 
				-	Title          string    `xml:"rss title"`
			
 
				-	Link           string    `xml:"rss link"`
			
 
				-	Description    string    `xml:"rss description"`
			
 
				-	Language       string    `xml:"rss language"`
			
 
				-	Copyright      string    `xml:"rss copyRight"`
			
 
				-	ManagingEditor string    `xml:"rss managingEditor"`
			
 
				-	Webmaster      string    `xml:"rss webMaster"`
			
 
				-	PubDate        string    `xml:"rss pubDate"`
			
 
				-	LastBuildDate  string    `xml:"rss lastBuildDate"`
			
 
				-	Categories     []string  `xml:"rss category"`
			
 
				-	Generator      string    `xml:"rss generator"`
			
 
				-	Docs           string    `xml:"rss docs"`
			
 
				-	Cloud          *RSSCloud `xml:"rss cloud"`
			
 
				-	Image          *RSSImage `xml:"rss image"`
			
 
				-	TTL            string    `xml:"rss ttl"`
			
 
				-	SkipHours      []string  `xml:"rss skipHours>hour"`
			
 
				-	SkipDays       []string  `xml:"rss skipDays>day"`
			
 
				-	Items          []RSSItem `xml:"rss item"`
			
 
				+	// Title is the name of the channel.
			
 
				+	Title string `xml:"rss title"`
			
 
				+
			
 
				+	// Link is the URL to the HTML website corresponding to the channel.
			
 
				+	Link string `xml:"rss link"`
			
 
				+
			
 
				+	// Description is a phrase or sentence describing the channel.
			
 
				+	Description string `xml:"rss description"`
			
 
				+
			
 
				+	// Language is the language the channel is written in.
			
 
				+	// A list of allowable values for this element, as provided by Netscape, is here: https://www.rssboard.org/rss-language-codes.
			
 
				+	// You may also use values defined by the W3C: https://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes.
			
 
				+	Language string `xml:"rss language"`
			
 
				+
			
 
				+	// Copyright is a string indicating the copyright.
			
 
				+	Copyright string `xml:"rss copyRight"`
			
 
				+
			
 
				+	// ManagingEditor is the email address for the person responsible for editorial content.
			
 
				+	ManagingEditor string `xml:"rss managingEditor"`
			
 
				+
			
 
				+	// Webmaster is the email address for the person responsible for technical issues relating to the channel.
			
 
				+	Webmaster string `xml:"rss webMaster"`
			
 
				+
			
 
				+	// PubDate is the publication date for the content in the channel.
			
 
				+	// All date-times in RSS conform to the Date and Time Specification of RFC 822, with the exception that the year may be expressed with two characters or four characters (four preferred).
			
 
				+	PubDate string `xml:"rss pubDate"`
			
 
				+
			
 
				+	// LastBuildDate is the last time the content of the channel changed.
			
 
				+	LastBuildDate string `xml:"rss lastBuildDate"`
			
 
				+
			
 
				+	// Categories is a collection of categories to which the channel belongs.
			
 
				+	Categories []string `xml:"rss category"`
			
 
				+
			
 
				+	// Generator is a string indicating the program used to generate the channel.
			
 
				+	Generator string `xml:"rss generator"`
			
 
				+
			
 
				+	// Docs is a URL that points to the documentation for the format used in the RSS file.
			
 
				+	DocumentationURL string `xml:"rss docs"`
			
 
				+
			
 
				+	// Cloud is a web service that supports the rssCloud interface which can be implemented in HTTP-POST, XML-RPC or SOAP 1.1.
			
 
				+	Cloud *RSSCloud `xml:"rss cloud"`
			
 
				+
			
 
				+	// Image specifies a GIF, JPEG or PNG image that can be displayed with the channel.
			
 
				+	Image *RSSImage `xml:"rss image"`
			
 
				+
			
 
				+	// TTL is a number of minutes that indicates how long a channel can be cached before refreshing from the source.
			
 
				+	TTL string `xml:"rss ttl"`
			
 
				+
			
 
				+	// SkipHours is a hint for aggregators telling them which hours they can skip.
			
 
				+	// An XML element that contains up to 24 <hour> sub-elements whose value is a number between 0 and 23,
			
 
				+	// representing a time in GMT, when aggregators,
			
 
				+	// if they support the feature, may not read the channel on hours listed in the skipHours element.
			
 
				+	SkipHours []string `xml:"rss skipHours>hour"`
			
 
				+
			
 
				+	// SkipDays is a hint for aggregators telling them which days they can skip.
			
 
				+	// An XML element that contains up to seven <day> sub-elements whose value is Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or Sunday.
			
 
				+	SkipDays []string `xml:"rss skipDays>day"`
			
 
				+
			
 
				+	// Items is a collection of items.
			
 
				+	Items []RSSItem `xml:"rss item"`
			
 
				+
			
 
				 	AtomLinks
			
 
				 	itunes.ItunesChannelElement
			
 
				 	googleplay.GooglePlayChannelElement
			
@@ -64,16 +110,56 @@ type RSSImage struct {
 
				 }
			
 
				 
			
 
				 type RSSItem struct {
			
 
				-	Title       string         `xml:"rss title"`
			
 
				-	Link        string         `xml:"rss link"`
			
 
				-	Description string         `xml:"rss description"`
			
 
				-	Author      RSSAuthor      `xml:"rss author"`
			
 
				-	Categories  []string       `xml:"rss category"`
			
 
				-	CommentsURL string         `xml:"rss comments"`
			
 
				-	Enclosures  []RSSEnclosure `xml:"rss enclosure"`
			
 
				-	GUID        RSSGUID        `xml:"rss guid"`
			
 
				-	PubDate     string         `xml:"rss pubDate"`
			
 
				-	Source      RSSSource      `xml:"rss source"`
			
 
				+	// Title is the title of the item.
			
 
				+	Title string `xml:"rss title"`
			
 
				+
			
 
				+	// Link is the URL of the item.
			
 
				+	Link string `xml:"rss link"`
			
 
				+
			
 
				+	// Description is the item synopsis.
			
 
				+	Description string `xml:"rss description"`
			
 
				+
			
 
				+	// Author is the email address of the author of the item.
			
 
				+	Author RSSAuthor `xml:"rss author"`
			
 
				+
			
 
				+	// <category> is an optional sub-element of <item>.
			
 
				+	// It has one optional attribute, domain, a string that identifies a categorization taxonomy.
			
 
				+	Categories []string `xml:"rss category"`
			
 
				+
			
 
				+	// <comments> is an optional sub-element of <item>.
			
 
				+	// If present, it contains the URL of the comments page for the item.
			
 
				+	CommentsURL string `xml:"rss comments"`
			
 
				+
			
 
				+	// <enclosure> is an optional sub-element of <item>.
			
 
				+	// It has three required attributes. url says where the enclosure is located,
			
 
				+	// length says how big it is in bytes, and type says what its type is, a standard MIME type.
			
 
				+	Enclosures []RSSEnclosure `xml:"rss enclosure"`
			
 
				+
			
 
				+	// <guid> is an optional sub-element of <item>.
			
 
				+	// It's a string that uniquely identifies the item.
			
 
				+	// When present, an aggregator may choose to use this string to determine if an item is new.
			
 
				+	//
			
 
				+	// There are no rules for the syntax of a guid.
			
 
				+	// Aggregators must view them as a string.
			
 
				+	// It's up to the source of the feed to establish the uniqueness of the string.
			
 
				+	//
			
 
				+	// If the guid element has an attribute named isPermaLink with a value of true,
			
 
				+	// the reader may assume that it is a permalink to the item, that is, a url that can be opened in a Web browser,
			
 
				+	// that points to the full item described by the <item> element.
			
 
				+	//
			
 
				+	// isPermaLink is optional, its default value is true.
			
 
				+	// If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
			
 
				+	GUID RSSGUID `xml:"rss guid"`
			
 
				+
			
 
				+	// <pubDate> is the publication date of the item.
			
 
				+	// Its value is a string in RFC 822 format.
			
 
				+	PubDate string `xml:"rss pubDate"`
			
 
				+
			
 
				+	// <source> is an optional sub-element of <item>.
			
 
				+	// Its value is the name of the RSS channel that the item came from, derived from its <title>.
			
 
				+	// It has one required attribute, url, which contains the URL of the RSS channel.
			
 
				+	Source RSSSource `xml:"rss source"`
			
 
				+
			
 
				 	dublincore.DublinCoreItemElement
			
 
				 	FeedBurnerItemElement
			
 
				 	media.MediaItemElement