Просмотр исходного кода

test(reader): ensure consistent tags parsing across feed formats

Frédéric Guillot 9 месяцев назад
Родитель
Сommit
2e26f5ca75

+ 2 - 0
internal/reader/atom/atom_10_adapter.go

@@ -137,6 +137,8 @@ func (a *Atom10Adapter) populateEntries(siteURL string) model.Entries {
 		if len(categories) == 0 {
 			categories = a.atomFeed.Categories.CategoryNames()
 		}
+
+		// Sort and deduplicate categories.
 		sort.Strings(categories)
 		entry.Tags = slices.Compact(categories)
 

+ 19 - 17
internal/reader/atom/atom_10_test.go

@@ -1761,6 +1761,8 @@ func TestParseItemWithCategories(t *testing.T) {
 		<updated>2003-12-13T18:30:02Z</updated>
 		<summary>Some text.</summary>
 		<category term='ZZZZ' />
+		<category term='ZZZZ' />
+		<category term=" " />
 		<category term='Technology' label='Science' />
 	  </entry>
 	</feed>`
@@ -1774,16 +1776,13 @@ func TestParseItemWithCategories(t *testing.T) {
 		t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := "Science"
-	result := feed.Entries[0].Tags[0]
-	if result != expected {
-		t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
-	}
+	expected := []string{"Science", "ZZZZ"}
+	result := feed.Entries[0].Tags
 
-	expected = "ZZZZ"
-	result = feed.Entries[0].Tags[1]
-	if result != expected {
-		t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
+	for i, tag := range result {
+		if tag != expected[i] {
+			t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
+		}
 	}
 }
 
@@ -1792,9 +1791,10 @@ func TestParseFeedWithCategories(t *testing.T) {
 	<feed xmlns="http://www.w3.org/2005/Atom">
 	  <title>Example Feed</title>
 	  <link href="http://example.org/"/>
-	  <category term='Test' label='Some Label' />
-	  <category term='Test' label='Some Label' />
-	  <category term='Test' label='Some Label' />
+	  <category term='C term' label='C label' />
+	  <category term='B term' label='B label' />
+	  <category term='B term' label='B label' />
+	  <category term='A term' label='A label' />
 	  <entry>
 	  	<link href="http://www.example.org/entries/1" />
 		<updated>2003-12-13T18:30:02Z</updated>
@@ -1807,14 +1807,16 @@ func TestParseFeedWithCategories(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if len(feed.Entries[0].Tags) != 1 {
+	if len(feed.Entries[0].Tags) != 3 {
 		t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := "Some Label"
-	result := feed.Entries[0].Tags[0]
-	if result != expected {
-		t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
+	expected := []string{"A label", "B label", "C label"}
+	result := feed.Entries[0].Tags
+	for i, tag := range result {
+		if tag != expected[i] {
+			t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
+		}
 	}
 }
 

+ 4 - 0
internal/reader/json/adapter.go

@@ -157,6 +157,10 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
 			}
 		}
 
+		// Sort and deduplicate tags.
+		slices.Sort(entry.Tags)
+		entry.Tags = slices.Compact(entry.Tags)
+
 		// Generate a hash for the entry.
 		for _, value := range []string{item.ID, item.URL, item.ContentText + item.ContentHTML + item.Summary} {
 			value = strings.TrimSpace(value)

+ 13 - 6
internal/reader/json/parser_test.go

@@ -790,7 +790,9 @@ func TestParseItemTags(t *testing.T) {
 				"tags": [
 					" tag 1",
 					" ",
-					"tag 2"
+					"tag 2",
+					"tag 2",
+					"aaa"
 				]
 			}
 		]
@@ -801,14 +803,19 @@ func TestParseItemTags(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if len(feed.Entries[0].Tags) != 2 {
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if len(feed.Entries[0].Tags) != 3 {
 		t.Errorf("Incorrect number of Tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := "tag 2"
-	result := feed.Entries[0].Tags[1]
-	if result != expected {
-		t.Errorf("Incorrect entry tag, got %q instead of %q", result, expected)
+	expected := []string{"aaa", "tag 1", "tag 2"}
+	for i, tag := range feed.Entries[0].Tags {
+		if tag != expected[i] {
+			t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
+		}
 	}
 }
 

+ 50 - 23
internal/reader/rss/adapter.go

@@ -7,6 +7,7 @@ import (
 	"html"
 	"log/slog"
 	"path"
+	"slices"
 	"strconv"
 	"strings"
 	"time"
@@ -124,31 +125,13 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed {
 		}
 
 		// Populate entry categories.
-		for _, tag := range item.Categories {
-			if tag != "" {
-				entry.Tags = append(entry.Tags, tag)
-			}
-		}
-		for _, tag := range item.MediaCategories.Labels() {
-			if tag != "" {
-				entry.Tags = append(entry.Tags, tag)
-			}
-		}
+		entry.Tags = findEntryTags(&item)
 		if len(entry.Tags) == 0 {
-			for _, tag := range r.rss.Channel.Categories {
-				if tag != "" {
-					entry.Tags = append(entry.Tags, tag)
-				}
-			}
-			for _, tag := range r.rss.Channel.GetItunesCategories() {
-				if tag != "" {
-					entry.Tags = append(entry.Tags, tag)
-				}
-			}
-			if r.rss.Channel.GooglePlayCategory.Text != "" {
-				entry.Tags = append(entry.Tags, r.rss.Channel.GooglePlayCategory.Text)
-			}
+			entry.Tags = findFeedTags(&r.rss.Channel)
 		}
+		// Sort and deduplicate tags.
+		slices.Sort(entry.Tags)
+		entry.Tags = slices.Compact(entry.Tags)
 
 		feed.Entries = append(feed.Entries, entry)
 	}
@@ -176,6 +159,30 @@ func findFeedAuthor(rssChannel *RSSChannel) string {
 	return strings.TrimSpace(sanitizer.StripTags(author))
 }
 
+func findFeedTags(rssChannel *RSSChannel) []string {
+	tags := make([]string, 0)
+
+	for _, tag := range rssChannel.Categories {
+		tag = strings.TrimSpace(tag)
+		if tag != "" {
+			tags = append(tags, tag)
+		}
+	}
+
+	for _, tag := range rssChannel.GetItunesCategories() {
+		tag = strings.TrimSpace(tag)
+		if tag != "" {
+			tags = append(tags, tag)
+		}
+	}
+
+	if tag := strings.TrimSpace(rssChannel.GooglePlayCategory.Text); tag != "" {
+		tags = append(tags, tag)
+	}
+
+	return tags
+}
+
 func findEntryTitle(rssItem *RSSItem) string {
 	title := rssItem.Title.Content
 
@@ -270,6 +277,26 @@ func findEntryAuthor(rssItem *RSSItem) string {
 	return strings.TrimSpace(sanitizer.StripTags(author))
 }
 
+func findEntryTags(rssItem *RSSItem) []string {
+	tags := make([]string, 0)
+
+	for _, tag := range rssItem.Categories {
+		tag = strings.TrimSpace(tag)
+		if tag != "" {
+			tags = append(tags, tag)
+		}
+	}
+
+	for _, tag := range rssItem.MediaCategories.Labels() {
+		tag = strings.TrimSpace(tag)
+		if tag != "" {
+			tags = append(tags, tag)
+		}
+	}
+
+	return tags
+}
+
 func findEntryEnclosures(rssItem *RSSItem, siteURL string) model.EnclosureList {
 	enclosures := make(model.EnclosureList, 0)
 	duplicates := make(map[string]bool)

+ 8 - 5
internal/reader/rss/parser_test.go

@@ -1971,6 +1971,9 @@ func TestParseEntryWithCategories(t *testing.T) {
 				<link>https://example.org/item</link>
 				<category>Category 1</category>
 				<category><![CDATA[Category 2]]></category>
+				<category>Category 2</category>
+				<category>Category 0</category>
+				<category>   </category>
 			</item>
 		</channel>
 		</rss>`
@@ -1980,11 +1983,11 @@ func TestParseEntryWithCategories(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	if len(feed.Entries[0].Tags) != 2 {
+	if len(feed.Entries[0].Tags) != 3 {
 		t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := []string{"Category 1", "Category 2"}
+	expected := []string{"Category 0", "Category 1", "Category 2"}
 	result := feed.Entries[0].Tags
 
 	for i, tag := range result {
@@ -2022,7 +2025,7 @@ func TestParseFeedWithItunesCategories(t *testing.T) {
 		t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"}
+	expected := []string{"Documentary", "Health", "Mental Health", "Society & Culture"}
 	result := feed.Entries[0].Tags
 
 	for i, tag := range result {
@@ -2091,12 +2094,12 @@ func TestParseEntryWithMediaCategories(t *testing.T) {
 		t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
 	}
 
-	expected := []string{"Visual Art", "Ace Ventura - Pet Detective"}
+	expected := []string{"Ace Ventura - Pet Detective", "Visual Art"}
 	result := feed.Entries[0].Tags
 
 	for i, tag := range result {
 		if tag != expected[i] {
-			t.Errorf("Incorrect tag, got: %q", tag)
+			t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
 		}
 	}
 }

+ 2 - 19
internal/storage/entry.go

@@ -8,8 +8,6 @@ import (
 	"errors"
 	"fmt"
 	"log/slog"
-	"slices"
-	"strings"
 	"time"
 
 	"miniflux.app/v2/internal/crypto"
@@ -142,7 +140,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
 		entry.UserID,
 		entry.FeedID,
 		entry.ReadingTime,
-		pq.Array(removeEmpty(removeDuplicates(entry.Tags))),
+		pq.Array(entry.Tags),
 	).Scan(
 		&entry.ID,
 		&entry.Status,
@@ -198,7 +196,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
 		entry.UserID,
 		entry.FeedID,
 		entry.Hash,
-		pq.Array(removeEmpty(removeDuplicates(entry.Tags))),
+		pq.Array(entry.Tags),
 	).Scan(&entry.ID)
 
 	if err != nil {
@@ -630,21 +628,6 @@ func (s *Storage) UnshareEntry(userID int64, entryID int64) (err error) {
 	return
 }
 
-func removeDuplicates(l []string) []string {
-	slices.Sort(l)
-	return slices.Compact(l)
-}
-
-func removeEmpty(l []string) []string {
-	var finalSlice []string
-	for _, item := range l {
-		if strings.TrimSpace(item) != "" {
-			finalSlice = append(finalSlice, item)
-		}
-	}
-	return finalSlice
-}
-
 func truncateString(s string) string {
 	if len(s) > truncationLen {
 		return s[:truncationLen]