Przeglądaj źródła

Parse podcast categories

Frédéric Guillot 2 lat temu
rodzic
commit
6d97f8b458

+ 11 - 0
internal/reader/itunes/itunes.go

@@ -22,6 +22,17 @@ type ItunesFeedElement struct {
 	ItunesType       string                  `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
 }
 
+func (i *ItunesFeedElement) GetItunesCategories() []string {
+	var categories []string
+	for _, category := range i.ItunesCategories {
+		categories = append(categories, category.Text)
+		if category.SubCategory != nil {
+			categories = append(categories, category.SubCategory.Text)
+		}
+	}
+	return categories
+}
+
 type ItunesItemElement struct {
 	ItunesAuthor      string             `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
 	ItunesEpisode     string             `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd episode"`

+ 92 - 18
internal/reader/rss/parser_test.go

@@ -1434,18 +1434,17 @@ func TestParseEntryWithRSSDescriptionAndMediaDescription(t *testing.T) {
 	}
 }
 
-func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
+func TestParseFeedWithCategories(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
 		<channel>
 			<title>Example</title>
 			<link>https://example.org/</link>
-			<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
+			<category>Category 1</category>
+			<category><![CDATA[Category 2]]></category>
 			<item>
 				<title>Test</title>
 				<link>https://example.org/item</link>
-				<category>Category 1</category>
-				<category>Category 2</category>
 			</item>
 		</channel>
 		</rss>`
@@ -1459,27 +1458,99 @@ func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
 		t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := "Category 2"
-	result := feed.Entries[0].Tags[1]
-	if result != expected {
-		t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
+	expected := []string{"Category 1", "Category 2"}
+	result := feed.Entries[0].Tags
+
+	for i, tag := range result {
+		if tag != expected[i] {
+			t.Errorf("Incorrect tag, got: %q", tag)
+		}
 	}
 }
 
-func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
+func TestParseEntryWithCategories(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
 		<channel>
 			<title>Example</title>
 			<link>https://example.org/</link>
-			<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
+			<category>Category 3</category>
+			<item>
+				<title>Test</title>
+				<link>https://example.org/item</link>
+				<category>Category 1</category>
+				<category><![CDATA[Category 2]]></category>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries[0].Tags) != 3 {
+		t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
+	}
+
+	expected := []string{"Category 1", "Category 2", "Category 3"}
+	result := feed.Entries[0].Tags
+
+	for i, tag := range result {
+		if tag != expected[i] {
+			t.Errorf("Incorrect tag, got: %q", tag)
+		}
+	}
+}
+
+func TestParseFeedWithItunesCategories(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
+		<channel>
+			<title>Example</title>
+			<link>https://example.org/</link>
+			<itunes:category text="Society &amp; Culture">
+				<itunes:category text="Documentary" />
+			</itunes:category>
+			<itunes:category text="Health">
+				<itunes:category text="Mental Health" />
+			</itunes:category>
+			<item>
+				<title>Test</title>
+				<link>https://example.org/item</link>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries[0].Tags) != 4 {
+		t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
+	}
+
+	expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"}
+	result := feed.Entries[0].Tags
+
+	for i, tag := range result {
+		if tag != expected[i] {
+			t.Errorf("Incorrect tag, got: %q", tag)
+		}
+	}
+}
+
+func TestParseFeedWithGooglePlayCategory(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:gplay="http://www.google.com/schemas/play-podcasts/1.0" version="2.0">
+		<channel>
+			<title>Example</title>
+			<link>https://example.org/</link>
+			<gplay:category text="Art"></gplay:category>
 			<item>
 				<title>Test</title>
 				<link>https://example.org/item</link>
-				<author>
-					by <![CDATA[Foo Bar]]>
-				</author>
-				<category>Sample Category</category>
 			</item>
 		</channel>
 		</rss>`
@@ -1493,10 +1564,13 @@ func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
 		t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := "Sample Category"
-	result := feed.Entries[0].Tags[0]
-	if result != expected {
-		t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
+	expected := []string{"Art"}
+	result := feed.Entries[0].Tags
+
+	for i, tag := range result {
+		if tag != expected[i] {
+			t.Errorf("Incorrect tag, got: %q", tag)
+		}
 	}
 }
 

+ 10 - 22
internal/reader/rss/rss.go

@@ -31,6 +31,7 @@ type rssFeed struct {
 }
 
 type rssChannel struct {
+	Categories     []string  `xml:"rss category"`
 	Title          string    `xml:"rss title"`
 	Link           string    `xml:"rss link"`
 	ImageURL       string    `xml:"rss image>url"`
@@ -111,6 +112,13 @@ func (r *rssFeed) Transform(baseURL string) *model.Feed {
 			entry.Title = entry.URL
 		}
 
+		entry.Tags = append(entry.Tags, r.Channel.Categories...)
+		entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...)
+
+		if r.Channel.GooglePlayCategory.Text != "" {
+			entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text)
+		}
+
 		feed.Entries = append(feed.Entries, entry)
 	}
 
@@ -165,12 +173,6 @@ type rssEnclosure struct {
 	Length string `xml:"length,attr"`
 }
 
-type rssCategory struct {
-	XMLName xml.Name
-	Data    string `xml:",chardata"`
-	Inner   string `xml:",innerxml"`
-}
-
 func (enclosure *rssEnclosure) Size() int64 {
 	if enclosure.Length == "" {
 		return 0
@@ -188,7 +190,7 @@ type rssItem struct {
 	Author         rssAuthor      `xml:"rss author"`
 	Comments       string         `xml:"rss comments"`
 	EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
-	Categories     []rssCategory  `xml:"rss category"`
+	Categories     []string       `xml:"rss category"`
 	dublincore.DublinCoreItemElement
 	FeedBurnerElement
 	media.Element
@@ -208,7 +210,7 @@ func (r *rssItem) Transform() *model.Entry {
 	entry.Content = r.entryContent()
 	entry.Title = r.entryTitle()
 	entry.Enclosures = r.entryEnclosures()
-	entry.Tags = r.entryCategories()
+	entry.Tags = r.Categories
 	if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
 		entry.ReadingTime = duration
 	}
@@ -383,20 +385,6 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
 	return enclosures
 }
 
-func (r *rssItem) entryCategories() []string {
-	categoryList := make([]string, 0)
-
-	for _, rssCategory := range r.Categories {
-		if strings.Contains(rssCategory.Inner, "<![CDATA[") {
-			categoryList = append(categoryList, strings.TrimSpace(rssCategory.Data))
-		} else {
-			categoryList = append(categoryList, strings.TrimSpace(rssCategory.Inner))
-		}
-	}
-
-	return categoryList
-}
-
 func (r *rssItem) entryCommentsURL() string {
 	commentsURL := strings.TrimSpace(r.Comments)
 	if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {